1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 84 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 85 86 /* gradients */ 87 ptr = work + 5*esize; 88 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 89 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 90 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 91 ierr = MatDestroy(&GE);CHKERRQ(ierr); 92 93 /* constants */ 94 ptr += rsize*csize; 95 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 96 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 97 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 98 ierr = MatDestroy(&GE);CHKERRQ(ierr); 99 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 100 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "PCBDDCNedelecSupport" 106 PetscErrorCode PCBDDCNedelecSupport(PC pc) 107 { 108 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 109 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 110 Mat G,T,conn,lG,lGt,lGis,lGall,lGe; 111 MatNullSpace nnsp; 112 Vec tvec,*quads; 113 PetscSF sfv; 114 ISLocalToGlobalMapping el2g,vl2g; 115 MPI_Comm comm; 116 IS lned,primals; 117 IS *eedges,*extrows,*extcols; 118 PetscBT btv,bte,btvc,btb,btvcand,btvi; 119 PetscScalar *vals,*work; 120 PetscReal *rwork; 121 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 122 PetscInt ne,nv,Ne,Nv,Le,Lv,order; 123 PetscInt n_neigh,*neigh,*n_shared,**shared; 124 PetscInt i,j,extmem,cum,maxsize,rst,nee,nquads=2; 125 PetscInt *extrow,*extrowcum,*marks,*emarks,*vmarks,*gidxs; 126 PetscInt *sfvleaves,*sfvroots; 127 PetscBool print,eerr,done,lrc[2],conforming,ismpiaij; 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 /* test variable order code and print debug info TODO: to be removed */ 132 print = PETSC_FALSE; 133 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 134 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 135 136 /* Return to caller if there are no edges in the decomposition */ 137 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 138 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 139 ierr = ISLocalToGlobalMappingGetSize(el2g,&ne);CHKERRQ(ierr); 140 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 141 lrc[0] = PETSC_FALSE; 142 for (i=0;i<ne;i++) { 143 if (PetscRealPart(vals[i]) > 2.) { 144 lrc[0] = PETSC_TRUE; 145 break; 146 } 147 } 148 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 149 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 150 if (!lrc[1]) PetscFunctionReturn(0); 151 152 /* Get relevant objects */ 153 G = pcbddc->discretegradient; 154 order = pcbddc->nedorder; 155 conforming = pcbddc->conforming; 156 157 /* Compute local and global sizes of egde dofs and nodal dofs */ 158 ierr = MatGetSize(G,&Ne,&Nv);CHKERRQ(ierr); 159 ierr = MatGetLocalSize(G,&Le,&Lv);CHKERRQ(ierr); 160 161 /* Sanity checks */ 162 if (!order) SETERRQ(comm,PETSC_ERR_SUP,"Variable order not yet implemented"); 163 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 164 ierr = PetscObjectTypeCompare((PetscObject)G,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 165 if (!ismpiaij) SETERRQ1(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with discrete gradient of type %s. Please use MPIAIJ",((PetscObject)G)->type_name); 166 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 167 if (order && Ne%order) SETERRQ2(comm,PETSC_ERR_USER,"The number of edge dofs %d it's not a multiple of the order %d",Ne,order); 168 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 169 ierr = MatGetSize(pc->pmat,&i,NULL);CHKERRQ(ierr); 170 if (Ne != i) SETERRQ2(comm,PETSC_ERR_USER,"Global matrix sizes should match! Number of rows of G %d differs from that of A %d",Ne,i); 171 ierr = MatGetLocalSize(pc->pmat,&i,NULL);CHKERRQ(ierr); 172 if (Le != i) SETERRQ2(comm,PETSC_ERR_USER,"Local matrix sizes should match! Number of local rows of G %d differs from that of A %d",Le,i); 173 174 /* Drop connections for interior edges (this modifies G) */ 175 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 176 ierr = VecScatterBegin(matis->rctx,matis->counter,tvec,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 177 ierr = VecScatterEnd(matis->rctx,matis->counter,tvec,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 178 ierr = PetscMalloc1(Le,&marks);CHKERRQ(ierr); 179 ierr = VecGetOwnershipRange(tvec,&rst,NULL);CHKERRQ(ierr); 180 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&vals);CHKERRQ(ierr); 181 cum = 0; 182 for (i=0;i<Le;i++) if (PetscRealPart(vals[i]) < 1.5) marks[cum++] = i+rst; 183 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&vals);CHKERRQ(ierr); 184 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 185 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 186 ierr = MatZeroRows(G,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 187 ierr = PetscFree(marks);CHKERRQ(ierr); 188 189 /* Extract subdomain relevant rows of G */ 190 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 191 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 192 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 193 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 194 ierr = ISDestroy(&lned);CHKERRQ(ierr); 195 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 196 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 197 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 198 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 199 if (print) { 200 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 201 ierr = MatView(lG,NULL);CHKERRQ(ierr); 202 } 203 204 /* SF for nodal communications */ 205 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 206 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 207 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 208 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 209 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 210 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 211 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 212 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 213 214 /* Destroy temporary G created in MATIS format */ 215 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 216 217 /* Analyze the edge-nodes connections (duplicate lG) */ 218 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 219 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 220 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 221 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 222 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 223 /* need to import the boundary specification to ensure the 224 proper detection of coarse edges' endpoints */ 225 if (pcbddc->DirichletBoundariesLocal) { 226 ierr = ISGetLocalSize(pcbddc->DirichletBoundariesLocal,&cum);CHKERRQ(ierr); 227 ierr = ISGetIndices(pcbddc->DirichletBoundariesLocal,&idxs);CHKERRQ(ierr); 228 for (i=0;i<cum;i++) { 229 if (idxs[i] >= 0) { 230 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 231 } 232 } 233 ierr = ISRestoreIndices(pcbddc->DirichletBoundariesLocal,&idxs);CHKERRQ(ierr); 234 } 235 if (pcbddc->NeumannBoundariesLocal) { 236 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&cum);CHKERRQ(ierr); 237 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 238 for (i=0;i<cum;i++) { 239 if (idxs[i] >= 0) { 240 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 241 } 242 } 243 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 244 } 245 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 246 /* need to remove coarse faces' dofs to ensure the 247 proper detection of coarse edges' endpoints */ 248 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 249 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 250 for (i=1;i<n_neigh;i++) { 251 PetscInt j; 252 for (j=0;j<n_shared[i];j++) marks[shared[i][j]]++; 253 } 254 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 255 cum = 0; 256 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 257 for (i=0;i<ne;i++) { 258 if (!marks[i] || (marks[i] == 1 && !PetscBTLookup(btb,i))) { /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 259 marks[cum++] = i; 260 } else if (!conforming && ii[i+1]-ii[i] != order + 1) { /* set badly connected edge dofs as primal */ 261 marks[cum++] = i; 262 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 263 for (j=ii[i];j<ii[i+1];j++) { 264 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 265 } 266 } 267 } 268 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 269 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 270 /* identify splitpoints and corner candidates: TODO variable order */ 271 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 272 if (print) { 273 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 274 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 275 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 276 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 277 } 278 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 279 for (i=0;i<nv;i++) { 280 #if defined(PETSC_USE_DEBUG) 281 if ((ii[i+1]-ii[i])%order) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",ii[i+1]-ii[i],i,order); 282 #endif 283 if (ii[i+1]-ii[i] >= 3*order) { /* splitpoints */ 284 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 285 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 286 } else if (ii[i+1]-ii[i] == order) { 287 if (order == 1) { 288 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 289 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 290 } else { 291 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 292 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 293 } 294 } 295 } 296 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 297 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 298 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 299 300 /* Get the local G^T explicitly */ 301 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 302 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 303 if (print) { 304 ierr = PetscObjectSetName((PetscObject)lGt,"initial_lGt");CHKERRQ(ierr); 305 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 306 } 307 308 /* Mark interior nodal dofs */ 309 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 310 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 311 for (i=1;i<n_neigh;i++) { 312 for (j=0;j<n_shared[i];j++) { 313 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 314 } 315 } 316 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 317 318 /* communicate corners and splitpoints */ 319 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 320 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 321 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 322 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 323 324 if (print) { 325 IS tbz; 326 327 cum = 0; 328 for (i=0;i<nv;i++) 329 if (sfvleaves[i]) 330 vmarks[cum++] = i; 331 332 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 333 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 334 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 335 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 336 } 337 338 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 339 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 340 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 341 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 342 343 /* Zero rows of lGt corresponding to identified corners 344 and interior nodal dofs */ 345 cum = 0; 346 for (i=0;i<nv;i++) { 347 if (sfvleaves[i]) { 348 vmarks[cum++] = i; 349 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 350 } 351 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 352 } 353 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 354 if (print) { 355 IS tbz; 356 357 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 358 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 359 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 360 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 361 } 362 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 363 ierr = PetscFree(vmarks);CHKERRQ(ierr); 364 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 365 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 366 367 /* Recompute G */ 368 ierr = MatDestroy(&lG);CHKERRQ(ierr); 369 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 370 if (print) { 371 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 372 ierr = MatView(lG,NULL);CHKERRQ(ierr); 373 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 374 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 375 } 376 377 /* Get primal dofs (if any) */ 378 cum = 0; 379 for (i=0;i<ne;i++) { 380 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 381 } 382 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 383 if (print) { 384 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 385 ierr = ISView(primals,NULL);CHKERRQ(ierr); 386 } 387 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 388 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 389 ierr = ISDestroy(&primals);CHKERRQ(ierr); 390 391 /* Compute edge connectivity */ 392 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 393 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 394 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 395 ierr = PCBDDCSetLocalAdjacencyGraph(pc,ne,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 396 397 /* Analyze interface for edge dofs */ 398 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 399 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 400 401 /* Get coarse edges in the edge space */ 402 pcbddc->mat_graph->twodim = PETSC_FALSE; 403 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 404 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 405 406 /* Mark fine edge dofs with their coarse edge id */ 407 maxsize = 0; 408 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 409 for (i=0;i<nee;i++) { 410 PetscInt size,mark = i+1; 411 412 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 413 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 414 for (j=0;j<size;j++) marks[idxs[j]] = mark; 415 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 416 maxsize = PetscMax(maxsize,size); 417 } 418 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 419 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 420 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 421 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 422 if (print) { 423 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 424 ierr = ISView(primals,NULL);CHKERRQ(ierr); 425 } 426 427 /* Find coarse edge endpoints */ 428 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 429 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 430 for (i=0;i<nee;i++) { 431 PetscInt mark = i+1,size; 432 433 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 434 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 435 if (print) { 436 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 437 ISView(eedges[i],NULL); 438 } 439 for (j=0;j<size;j++) { 440 PetscInt k, ee = idxs[j]; 441 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 442 for (k=ii[ee];k<ii[ee+1];k++) { 443 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 444 if (PetscBTLookup(btv,jj[k])) { 445 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 446 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 447 if (print) PetscPrintf(PETSC_COMM_SELF," candidate? %d\n",(PetscBool)(PetscBTLookup(btvcand,jj[k]))); 448 PetscInt k2; 449 PetscBool corner = PETSC_FALSE; 450 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 451 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: count %d, mark %d (ref mark %d), special %d\n",jjt[k2],pcbddc->mat_graph->count[jjt[k2]],marks[jjt[k2]],mark,pcbddc->mat_graph->special_dof[jjt[k2]]); 452 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || 453 pcbddc->mat_graph->special_dof[jjt[k2]] == PCBDDCGRAPH_DIRICHLET_MARK) { 454 corner = PETSC_TRUE; 455 break; 456 } 457 } 458 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 459 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 460 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 461 } else { 462 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 463 } 464 } 465 } 466 } 467 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 468 } 469 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 470 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 471 472 /* Reset marked primal dofs */ 473 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 474 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 475 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 476 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 477 478 /* Compute extended cols indices */ 479 ierr = MatGetRowIJ(lG ,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 480 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 481 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 482 i *= maxsize; 483 ierr = PetscMalloc1(nee,&extcols);CHKERRQ(ierr); 484 ierr = PetscCalloc1(nee,&emarks);CHKERRQ(ierr); 485 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 486 eerr = PETSC_FALSE; 487 for (i=0;i<nee;i++) { 488 PetscInt size; 489 490 cum = 0; 491 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 492 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 493 for (j=0;j<size;j++) { 494 PetscInt k,ee = idxs[j]; 495 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 496 } 497 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 498 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 499 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 500 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 501 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 502 /* it may happen that endpoints are not defined at this point 503 if it is the case, mark this edge for a second pass */ 504 if (cum != size -1) { 505 emarks[i] = 1; 506 if (print) { 507 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 508 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 509 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 510 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 511 } 512 eerr = PETSC_TRUE; 513 } 514 } 515 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 516 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 517 if (done) { 518 PetscInt *newprimals; 519 520 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 521 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 522 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 523 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 524 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 525 for (i=0;i<nee;i++) { 526 if (emarks[i]) { 527 PetscInt size,mark = i+1; 528 529 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 530 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 531 for (j=0;j<size;j++) { 532 /* newprimals[cum++] = idxs[j]; */ 533 PetscInt k,ee = idxs[j]; 534 for (k=ii[ee];k<ii[ee+1];k++) { 535 /* set all candidates located on the edge as corners */ 536 if (PetscBTLookup(btvcand,jj[k])) { 537 PetscInt k2,vv = jj[k]; 538 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 539 /* set all edge dofs connected to candidate as primals */ 540 for (k2=iit[vv];k2<iit[vv+1];k2++) { 541 if (marks[jjt[k2]] == mark) { 542 PetscInt k3,ee2 = jjt[k2]; 543 newprimals[cum++] = ee2; 544 /* finally set the new corners */ 545 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 546 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 547 } 548 } 549 } 550 } 551 } 552 } 553 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 554 } 555 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 556 } 557 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 558 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 559 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 560 ierr = PetscFree(newprimals);CHKERRQ(ierr); 561 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 562 ierr = ISDestroy(&primals);CHKERRQ(ierr); 563 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 564 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 565 566 /* Mark again */ 567 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 568 for (i=0;i<nee;i++) { 569 PetscInt size,mark = i+1; 570 571 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 572 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 573 for (j=0;j<size;j++) marks[idxs[j]] = mark; 574 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 575 } 576 if (print) { 577 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 578 ierr = ISView(primals,NULL);CHKERRQ(ierr); 579 } 580 581 /* Recompute extended cols */ 582 eerr = PETSC_FALSE; 583 for (i=0;i<nee;i++) { 584 PetscInt size; 585 586 cum = 0; 587 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 588 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 589 for (j=0;j<size;j++) { 590 PetscInt k,ee = idxs[j]; 591 for (k=ii[ee];k<ii[ee+1];k++) { 592 if (!PetscBTLookup(btv,jj[k])) { 593 extrow[cum++] = jj[k]; 594 } 595 } 596 } 597 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 598 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 599 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 600 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 601 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 602 if (cum != size -1) { 603 if (print) { 604 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 605 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 606 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 607 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 608 } 609 eerr = PETSC_TRUE; 610 } 611 } 612 } 613 ierr = MatRestoreRowIJ( lG,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 614 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 615 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 616 ierr = PetscFree(emarks);CHKERRQ(ierr); 617 /* an error should not occur at this point */ 618 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 619 620 /* Check the number of endpoints */ 621 /* TODO: add case for circular edge */ 622 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 623 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 624 for (i=0;i<nee;i++) { 625 PetscInt size, found = 0; 626 627 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 628 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 629 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 630 for (j=0;j<size;j++) { 631 PetscInt k,ee = idxs[j]; 632 for (k=ii[ee];k<ii[ee+1];k++) { 633 PetscInt vv = jj[k]; 634 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 635 found++; 636 } 637 } 638 } 639 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 640 if (found != 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d\n",found,i); 641 } 642 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 643 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 644 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 645 646 #if defined(PETSC_USE_DEBUG) 647 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 648 not interfere with neighbouring coarse edges */ 649 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 650 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 651 for (i=0;i<nv;i++) { 652 PetscInt emax = 0,eemax = 0; 653 654 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 655 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 656 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 657 for (j=1;j<nee+1;j++) { 658 if (emax < emarks[j]) { 659 emax = emarks[j]; 660 eemax = j; 661 } 662 } 663 /* not relevant for edges */ 664 if (!eemax) continue; 665 666 for (j=ii[i];j<ii[i+1];j++) { 667 if (marks[jj[j]] && marks[jj[j]] != eemax) { 668 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dod %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 669 } 670 } 671 } 672 ierr = PetscFree(emarks);CHKERRQ(ierr); 673 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 674 #endif 675 676 /* Compute extended rows indices for edge blocks of the change of basis */ 677 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 678 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 679 extmem *= maxsize; 680 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 681 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 682 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 683 for (i=0;i<nv;i++) { 684 PetscInt mark = 0,size,start; 685 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 686 for (j=ii[i];j<ii[i+1];j++) 687 if (marks[jj[j]] && !mark) 688 mark = marks[jj[j]]; 689 690 /* not relevant */ 691 if (!mark) continue; 692 693 /* import extended row */ 694 mark--; 695 start = mark*extmem+extrowcum[mark]; 696 size = ii[i+1]-ii[i]; 697 #if defined(PETSC_USE_DEBUG) 698 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 699 #endif 700 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 701 extrowcum[mark] += size; 702 } 703 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 704 cum = 0; 705 for (i=0;i<nee;i++) { 706 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 707 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 708 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 709 cum = PetscMax(cum,size); 710 } 711 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 712 ierr = PetscFree(marks);CHKERRQ(ierr); 713 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 714 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 715 716 /* Workspace for lapack inner calls and VecSetValues */ 717 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 718 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 719 for (i=0;i<maxsize;i++) vals[i] = 1.; 720 721 /* Create vectors for quadrature rules */ 722 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 723 for (i=0;i<nquads;i++) { 724 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 725 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 726 } 727 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 728 729 /* Create change of basis matrix (preallocation can be improved) */ 730 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 731 ierr = MatSetSizes(T,Le,Le,Ne,Ne);CHKERRQ(ierr); 732 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 733 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 734 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 735 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 736 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 737 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 738 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 739 740 /* Defaults to identity */ 741 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 742 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 743 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 744 745 for (i=0;i<nee;i++) { 746 Mat Gins = NULL, GKins = NULL; 747 748 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 749 if (Gins && GKins) { 750 PetscScalar *data; 751 const PetscInt *rows,*cols; 752 PetscInt nrh,nch,nrc,ncc; 753 754 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 755 /* H1 */ 756 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 757 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 758 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 759 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 760 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 761 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 762 /* complement */ 763 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 764 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 765 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 766 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 767 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 768 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 769 /* Gins kernel quadratures */ 770 for (j=0;j<ncc;j++) { 771 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 772 } 773 /* H1 average */ 774 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 775 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 776 } 777 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 778 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 779 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 780 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 781 } 782 783 /* Start assembling */ 784 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 785 for (i=0;i<nquads;i++) { 786 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 787 } 788 789 /* Free */ 790 ierr = PetscFree(extrow);CHKERRQ(ierr); 791 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 792 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 793 ierr = PetscFree(vals);CHKERRQ(ierr); 794 ierr = PetscFree(extrows);CHKERRQ(ierr); 795 ierr = PetscFree(extcols);CHKERRQ(ierr); 796 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 797 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 798 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 799 ierr = MatDestroy(&lG);CHKERRQ(ierr); 800 ierr = MatDestroy(&conn);CHKERRQ(ierr); 801 802 /* Complete assembling */ 803 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 804 for (i=0;i<nquads;i++) { 805 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 806 } 807 for (i=0;i<nquads;i++) { 808 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(quads);CHKERRQ(ierr); 811 812 /* tell PCBDDC the topography has been analyzed */ 813 pcbddc->recompute_topography = PETSC_FALSE; 814 815 /* set change of basis */ 816 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 817 ierr = MatDestroy(&T);CHKERRQ(ierr); 818 819 /* set quadratures */ 820 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 821 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 822 823 PetscFunctionReturn(0); 824 } 825 826 /* the near-null space of BDDC carries information on quadrature weights, 827 and these can be collinear -> so cheat with MatNullSpaceCreate 828 and create a suitable set of basis vectors first */ 829 #undef __FUNCT__ 830 #define __FUNCT__ "PCBDDCNullSpaceCreate" 831 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 832 { 833 PetscErrorCode ierr; 834 PetscInt i; 835 836 PetscFunctionBegin; 837 for (i=0;i<nvecs;i++) { 838 PetscInt first,last; 839 840 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 841 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 842 if (i>=first && i < last) { 843 PetscScalar *data; 844 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 845 if (!has_const) { 846 data[i-first] = 1.; 847 } else { 848 data[2*i-first] = 1./PetscSqrtReal(2.); 849 data[2*i-first+1] = -1./PetscSqrtReal(2.); 850 } 851 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 852 } 853 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 854 } 855 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 856 for (i=0;i<nvecs;i++) { /* reset vectors */ 857 PetscInt first,last; 858 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 859 if (i>=first && i < last) { 860 PetscScalar *data; 861 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 862 if (!has_const) { 863 data[i-first] = 0.; 864 } else { 865 data[2*i-first] = 0.; 866 data[2*i-first+1] = 0.; 867 } 868 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 869 } 870 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 871 } 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 877 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 878 { 879 Mat loc_divudotp; 880 Vec p,v,vins,quad_vec,*quad_vecs; 881 ISLocalToGlobalMapping map; 882 IS *faces,*edges; 883 PetscScalar *vals; 884 const PetscScalar *array; 885 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 886 PetscMPIInt rank; 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 891 if (graph->twodim) { 892 lmaxneighs = 2; 893 } else { 894 lmaxneighs = 1; 895 for (i=0;i<ne;i++) { 896 const PetscInt *idxs; 897 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 898 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 899 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 900 } 901 lmaxneighs++; /* graph count does not include self */ 902 } 903 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 904 maxsize = 0; 905 for (i=0;i<ne;i++) { 906 PetscInt nn; 907 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 908 maxsize = PetscMax(maxsize,nn); 909 } 910 for (i=0;i<nf;i++) { 911 PetscInt nn; 912 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 913 maxsize = PetscMax(maxsize,nn); 914 } 915 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 916 /* create vectors to hold quadrature weights */ 917 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 918 if (!transpose) { 919 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 920 } else { 921 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 922 } 923 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 924 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 925 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 926 for (i=0;i<maxneighs;i++) { 927 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 928 } 929 930 /* compute local quad vec */ 931 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 932 if (!transpose) { 933 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 934 } else { 935 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 936 } 937 ierr = VecSet(p,1.);CHKERRQ(ierr); 938 if (!transpose) { 939 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 940 } else { 941 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 942 } 943 if (vl2l) { 944 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 945 } else { 946 vins = v; 947 } 948 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 949 ierr = VecDestroy(&p);CHKERRQ(ierr); 950 951 /* insert in global quadrature vecs */ 952 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 953 for (i=0;i<nf;i++) { 954 const PetscInt *idxs; 955 PetscInt idx,nn,j; 956 957 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 958 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 959 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 960 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 961 idx = -(idx+1); 962 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 963 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 964 } 965 for (i=0;i<ne;i++) { 966 const PetscInt *idxs; 967 PetscInt idx,nn,j; 968 969 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 970 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 971 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 972 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 973 idx = -(idx+1); 974 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 975 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 976 } 977 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 978 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 979 if (vl2l) { 980 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 981 } 982 ierr = VecDestroy(&v);CHKERRQ(ierr); 983 ierr = PetscFree(vals);CHKERRQ(ierr); 984 985 /* assemble near null space */ 986 for (i=0;i<maxneighs;i++) { 987 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 988 } 989 for (i=0;i<maxneighs;i++) { 990 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 991 } 992 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 999 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1000 { 1001 PetscErrorCode ierr; 1002 Vec local,global; 1003 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1004 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1005 1006 PetscFunctionBegin; 1007 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1008 /* need to convert from global to local topology information and remove references to information in global ordering */ 1009 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1010 if (pcbddc->user_provided_isfordofs) { 1011 if (pcbddc->n_ISForDofs) { 1012 PetscInt i; 1013 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1014 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1015 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1016 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1017 } 1018 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1019 pcbddc->n_ISForDofs = 0; 1020 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1021 } 1022 } else { 1023 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1024 PetscInt i, n = matis->A->rmap->n; 1025 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1026 if (i > 1) { 1027 pcbddc->n_ISForDofsLocal = i; 1028 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1029 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1030 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1031 } 1032 } 1033 } 1034 } 1035 1036 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1037 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1038 } 1039 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1040 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1041 } 1042 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1043 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1044 } 1045 ierr = VecDestroy(&global);CHKERRQ(ierr); 1046 ierr = VecDestroy(&local);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1052 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1053 { 1054 PC_IS *pcis = (PC_IS*)(pc->data); 1055 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1056 PetscErrorCode ierr; 1057 1058 PetscFunctionBegin; 1059 if (!pcbddc->benign_have_null) { 1060 PetscFunctionReturn(0); 1061 } 1062 if (pcbddc->ChangeOfBasisMatrix) { 1063 Vec swap; 1064 1065 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1066 swap = pcbddc->work_change; 1067 pcbddc->work_change = r; 1068 r = swap; 1069 } 1070 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1071 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1072 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1073 ierr = VecSet(z,0.);CHKERRQ(ierr); 1074 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1075 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1076 if (pcbddc->ChangeOfBasisMatrix) { 1077 Vec swap; 1078 1079 swap = r; 1080 r = pcbddc->work_change; 1081 pcbddc->work_change = swap; 1082 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1083 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1090 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1091 { 1092 PCBDDCBenignMatMult_ctx ctx; 1093 PetscErrorCode ierr; 1094 PetscBool apply_right,apply_left,reset_x; 1095 1096 PetscFunctionBegin; 1097 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1098 if (transpose) { 1099 apply_right = ctx->apply_left; 1100 apply_left = ctx->apply_right; 1101 } else { 1102 apply_right = ctx->apply_right; 1103 apply_left = ctx->apply_left; 1104 } 1105 reset_x = PETSC_FALSE; 1106 if (apply_right) { 1107 const PetscScalar *ax; 1108 PetscInt nl,i; 1109 1110 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1111 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1112 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1113 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1114 for (i=0;i<ctx->benign_n;i++) { 1115 PetscScalar sum,val; 1116 const PetscInt *idxs; 1117 PetscInt nz,j; 1118 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1119 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1120 sum = 0.; 1121 if (ctx->apply_p0) { 1122 val = ctx->work[idxs[nz-1]]; 1123 for (j=0;j<nz-1;j++) { 1124 sum += ctx->work[idxs[j]]; 1125 ctx->work[idxs[j]] += val; 1126 } 1127 } else { 1128 for (j=0;j<nz-1;j++) { 1129 sum += ctx->work[idxs[j]]; 1130 } 1131 } 1132 ctx->work[idxs[nz-1]] -= sum; 1133 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1134 } 1135 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1136 reset_x = PETSC_TRUE; 1137 } 1138 if (transpose) { 1139 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1140 } else { 1141 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1142 } 1143 if (reset_x) { 1144 ierr = VecResetArray(x);CHKERRQ(ierr); 1145 } 1146 if (apply_left) { 1147 PetscScalar *ay; 1148 PetscInt i; 1149 1150 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1151 for (i=0;i<ctx->benign_n;i++) { 1152 PetscScalar sum,val; 1153 const PetscInt *idxs; 1154 PetscInt nz,j; 1155 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1156 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1157 val = -ay[idxs[nz-1]]; 1158 if (ctx->apply_p0) { 1159 sum = 0.; 1160 for (j=0;j<nz-1;j++) { 1161 sum += ay[idxs[j]]; 1162 ay[idxs[j]] += val; 1163 } 1164 ay[idxs[nz-1]] += sum; 1165 } else { 1166 for (j=0;j<nz-1;j++) { 1167 ay[idxs[j]] += val; 1168 } 1169 ay[idxs[nz-1]] = 0.; 1170 } 1171 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1172 } 1173 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1174 } 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1180 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1181 { 1182 PetscErrorCode ierr; 1183 1184 PetscFunctionBegin; 1185 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1191 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1192 { 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "PCBDDCBenignShellMat" 1202 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1203 { 1204 PC_IS *pcis = (PC_IS*)pc->data; 1205 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1206 PCBDDCBenignMatMult_ctx ctx; 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 if (!restore) { 1211 Mat A_IB,A_BI; 1212 PetscScalar *work; 1213 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1214 1215 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1216 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1217 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1218 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1219 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1220 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1221 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1222 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1223 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1224 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1225 ctx->apply_left = PETSC_TRUE; 1226 ctx->apply_right = PETSC_FALSE; 1227 ctx->apply_p0 = PETSC_FALSE; 1228 ctx->benign_n = pcbddc->benign_n; 1229 if (reuse) { 1230 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1231 ctx->free = PETSC_FALSE; 1232 } else { /* TODO: could be optimized for successive solves */ 1233 ISLocalToGlobalMapping N_to_D; 1234 PetscInt i; 1235 1236 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1237 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1238 for (i=0;i<pcbddc->benign_n;i++) { 1239 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1240 } 1241 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1242 ctx->free = PETSC_TRUE; 1243 } 1244 ctx->A = pcis->A_IB; 1245 ctx->work = work; 1246 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1247 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1248 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1249 pcis->A_IB = A_IB; 1250 1251 /* A_BI as A_IB^T */ 1252 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1253 pcbddc->benign_original_mat = pcis->A_BI; 1254 pcis->A_BI = A_BI; 1255 } else { 1256 if (!pcbddc->benign_original_mat) { 1257 PetscFunctionReturn(0); 1258 } 1259 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1260 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1261 pcis->A_IB = ctx->A; 1262 ctx->A = NULL; 1263 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1264 pcis->A_BI = pcbddc->benign_original_mat; 1265 pcbddc->benign_original_mat = NULL; 1266 if (ctx->free) { 1267 PetscInt i; 1268 for (i=0;i<ctx->benign_n;i++) { 1269 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1270 } 1271 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1272 } 1273 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1274 ierr = PetscFree(ctx);CHKERRQ(ierr); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 /* used just in bddc debug mode */ 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "PCBDDCBenignProject" 1282 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1283 { 1284 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1285 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1286 Mat An; 1287 PetscErrorCode ierr; 1288 1289 PetscFunctionBegin; 1290 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1291 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1292 if (is1) { 1293 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1294 ierr = MatDestroy(&An);CHKERRQ(ierr); 1295 } else { 1296 *B = An; 1297 } 1298 PetscFunctionReturn(0); 1299 } 1300 1301 /* TODO: add reuse flag */ 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatSeqAIJCompress" 1304 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1305 { 1306 Mat Bt; 1307 PetscScalar *a,*bdata; 1308 const PetscInt *ii,*ij; 1309 PetscInt m,n,i,nnz,*bii,*bij; 1310 PetscBool flg_row; 1311 PetscErrorCode ierr; 1312 1313 PetscFunctionBegin; 1314 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1315 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1316 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1317 nnz = n; 1318 for (i=0;i<ii[n];i++) { 1319 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1320 } 1321 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1322 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1323 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1324 nnz = 0; 1325 bii[0] = 0; 1326 for (i=0;i<n;i++) { 1327 PetscInt j; 1328 for (j=ii[i];j<ii[i+1];j++) { 1329 PetscScalar entry = a[j]; 1330 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1331 bij[nnz] = ij[j]; 1332 bdata[nnz] = entry; 1333 nnz++; 1334 } 1335 } 1336 bii[i+1] = nnz; 1337 } 1338 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1339 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1340 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1341 { 1342 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1343 b->free_a = PETSC_TRUE; 1344 b->free_ij = PETSC_TRUE; 1345 } 1346 *B = Bt; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatDetectDisconnectedComponents" 1352 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1353 { 1354 Mat B; 1355 IS is_dummy,*cc_n; 1356 ISLocalToGlobalMapping l2gmap_dummy; 1357 PCBDDCGraph graph; 1358 PetscInt i,n; 1359 PetscInt *xadj,*adjncy; 1360 PetscInt *xadj_filtered,*adjncy_filtered; 1361 PetscBool flg_row,isseqaij; 1362 PetscErrorCode ierr; 1363 1364 PetscFunctionBegin; 1365 if (!A->rmap->N || !A->cmap->N) { 1366 *ncc = 0; 1367 *cc = NULL; 1368 PetscFunctionReturn(0); 1369 } 1370 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1371 if (!isseqaij && filter) { 1372 PetscBool isseqdense; 1373 1374 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1375 if (!isseqdense) { 1376 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1377 } else { /* TODO: rectangular case and LDA */ 1378 PetscScalar *array; 1379 PetscReal chop=1.e-6; 1380 1381 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1382 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1383 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1384 for (i=0;i<n;i++) { 1385 PetscInt j; 1386 for (j=i+1;j<n;j++) { 1387 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1388 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1389 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1390 } 1391 } 1392 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1393 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1394 } 1395 } else { 1396 B = A; 1397 } 1398 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1399 1400 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1401 if (filter) { 1402 PetscScalar *data; 1403 PetscInt j,cum; 1404 1405 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1406 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1407 cum = 0; 1408 for (i=0;i<n;i++) { 1409 PetscInt t; 1410 1411 for (j=xadj[i];j<xadj[i+1];j++) { 1412 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1413 continue; 1414 } 1415 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1416 } 1417 t = xadj_filtered[i]; 1418 xadj_filtered[i] = cum; 1419 cum += t; 1420 } 1421 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1422 } else { 1423 xadj_filtered = NULL; 1424 adjncy_filtered = NULL; 1425 } 1426 1427 /* compute local connected components using PCBDDCGraph */ 1428 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1429 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1430 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1431 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1432 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1433 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1434 if (xadj_filtered) { 1435 graph->xadj = xadj_filtered; 1436 graph->adjncy = adjncy_filtered; 1437 } else { 1438 graph->xadj = xadj; 1439 graph->adjncy = adjncy; 1440 } 1441 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1442 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1443 /* partial clean up */ 1444 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1445 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1446 if (A != B) { 1447 ierr = MatDestroy(&B);CHKERRQ(ierr); 1448 } 1449 1450 /* get back data */ 1451 if (ncc) *ncc = graph->ncc; 1452 if (cc) { 1453 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1454 for (i=0;i<graph->ncc;i++) { 1455 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1456 } 1457 *cc = cc_n; 1458 } 1459 /* clean up graph */ 1460 graph->xadj = 0; 1461 graph->adjncy = 0; 1462 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "PCBDDCBenignCheck" 1468 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1469 { 1470 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1471 PC_IS* pcis = (PC_IS*)(pc->data); 1472 IS dirIS = NULL; 1473 PetscInt i; 1474 PetscErrorCode ierr; 1475 1476 PetscFunctionBegin; 1477 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1478 if (zerodiag) { 1479 Mat A; 1480 Vec vec3_N; 1481 PetscScalar *vals; 1482 const PetscInt *idxs; 1483 PetscInt nz,*count; 1484 1485 /* p0 */ 1486 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1487 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1488 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1489 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1490 for (i=0;i<nz;i++) vals[i] = 1.; 1491 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1492 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1493 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1494 /* v_I */ 1495 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1496 for (i=0;i<nz;i++) vals[i] = 0.; 1497 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1498 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1499 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1500 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1501 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1502 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1503 if (dirIS) { 1504 PetscInt n; 1505 1506 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1507 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1508 for (i=0;i<n;i++) vals[i] = 0.; 1509 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1510 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1511 } 1512 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1513 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1514 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1515 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1516 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1517 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1518 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1519 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1520 ierr = PetscFree(vals);CHKERRQ(ierr); 1521 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1522 1523 /* there should not be any pressure dofs lying on the interface */ 1524 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1525 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1526 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1527 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1528 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1529 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1530 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1531 ierr = PetscFree(count);CHKERRQ(ierr); 1532 } 1533 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1534 1535 /* check PCBDDCBenignGetOrSetP0 */ 1536 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1537 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1538 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1539 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1540 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1541 for (i=0;i<pcbddc->benign_n;i++) { 1542 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1543 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1544 } 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1550 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1551 { 1552 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1553 IS pressures,zerodiag,*zerodiag_subs; 1554 PetscInt nz,n; 1555 PetscInt *interior_dofs,n_interior_dofs; 1556 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1557 PetscErrorCode ierr; 1558 1559 PetscFunctionBegin; 1560 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1561 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1562 for (n=0;n<pcbddc->benign_n;n++) { 1563 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1564 } 1565 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1566 pcbddc->benign_n = 0; 1567 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1568 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1569 Checks if all the pressure dofs in each subdomain have a zero diagonal 1570 If not, a change of basis on pressures is not needed 1571 since the local Schur complements are already SPD 1572 */ 1573 has_null_pressures = PETSC_TRUE; 1574 have_null = PETSC_TRUE; 1575 if (pcbddc->n_ISForDofsLocal) { 1576 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1577 1578 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1579 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1580 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1581 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1582 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1583 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1584 if (!sorted) { 1585 ierr = ISSort(pressures);CHKERRQ(ierr); 1586 } 1587 } else { 1588 pressures = NULL; 1589 } 1590 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1591 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1592 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1593 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1594 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1595 if (!sorted) { 1596 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1597 } 1598 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1599 if (!nz) { 1600 if (n) have_null = PETSC_FALSE; 1601 has_null_pressures = PETSC_FALSE; 1602 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1603 } 1604 recompute_zerodiag = PETSC_FALSE; 1605 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1606 zerodiag_subs = NULL; 1607 pcbddc->benign_n = 0; 1608 n_interior_dofs = 0; 1609 interior_dofs = NULL; 1610 if (pcbddc->current_level) { /* need to compute interior nodes */ 1611 PetscInt n,i,j; 1612 PetscInt n_neigh,*neigh,*n_shared,**shared; 1613 PetscInt *iwork; 1614 1615 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1616 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1617 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1618 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1619 for (i=1;i<n_neigh;i++) 1620 for (j=0;j<n_shared[i];j++) 1621 iwork[shared[i][j]] += 1; 1622 for (i=0;i<n;i++) 1623 if (!iwork[i]) 1624 interior_dofs[n_interior_dofs++] = i; 1625 ierr = PetscFree(iwork);CHKERRQ(ierr); 1626 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1627 } 1628 if (has_null_pressures) { 1629 IS *subs; 1630 PetscInt nsubs,i,j,nl; 1631 const PetscInt *idxs; 1632 PetscScalar *array; 1633 Vec *work; 1634 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1635 1636 subs = pcbddc->local_subs; 1637 nsubs = pcbddc->n_local_subs; 1638 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1639 if (pcbddc->current_level) { 1640 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1641 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1642 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1643 /* work[0] = 1_p */ 1644 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1645 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1646 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1647 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1648 /* work[0] = 1_v */ 1649 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1650 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1651 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1652 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1653 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1654 } 1655 if (nsubs > 1) { 1656 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1657 for (i=0;i<nsubs;i++) { 1658 ISLocalToGlobalMapping l2g; 1659 IS t_zerodiag_subs; 1660 PetscInt nl; 1661 1662 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1663 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1664 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1665 if (nl) { 1666 PetscBool valid = PETSC_TRUE; 1667 1668 if (pcbddc->current_level) { 1669 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1670 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1671 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1672 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1673 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1674 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1675 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 1676 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 1677 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1678 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 1679 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1680 for (j=0;j<n_interior_dofs;j++) { 1681 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1682 valid = PETSC_FALSE; 1683 break; 1684 } 1685 } 1686 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1687 } 1688 if (valid && pcbddc->NeumannBoundariesLocal) { 1689 IS t_bc; 1690 PetscInt nzb; 1691 1692 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 1693 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 1694 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 1695 if (nzb) valid = PETSC_FALSE; 1696 } 1697 if (valid && pressures) { 1698 IS t_pressure_subs; 1699 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 1700 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 1701 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 1702 } 1703 if (valid) { 1704 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 1705 pcbddc->benign_n++; 1706 } else { 1707 recompute_zerodiag = PETSC_TRUE; 1708 } 1709 } 1710 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 1711 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 1712 } 1713 } else { /* there's just one subdomain (or zero if they have not been detected */ 1714 PetscBool valid = PETSC_TRUE; 1715 1716 if (pcbddc->NeumannBoundariesLocal) { 1717 PetscInt nzb; 1718 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 1719 if (nzb) valid = PETSC_FALSE; 1720 } 1721 if (valid && pressures) { 1722 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 1723 } 1724 if (valid && pcbddc->current_level) { 1725 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 1726 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 1727 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1728 for (j=0;j<n_interior_dofs;j++) { 1729 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1730 valid = PETSC_FALSE; 1731 break; 1732 } 1733 } 1734 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1735 } 1736 if (valid) { 1737 pcbddc->benign_n = 1; 1738 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 1739 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 1740 zerodiag_subs[0] = zerodiag; 1741 } 1742 } 1743 if (pcbddc->current_level) { 1744 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 1745 } 1746 } 1747 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 1748 1749 if (!pcbddc->benign_n) { 1750 PetscInt n; 1751 1752 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1753 recompute_zerodiag = PETSC_FALSE; 1754 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1755 if (n) { 1756 has_null_pressures = PETSC_FALSE; 1757 have_null = PETSC_FALSE; 1758 } 1759 } 1760 1761 /* final check for null pressures */ 1762 if (zerodiag && pressures) { 1763 PetscInt nz,np; 1764 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1765 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 1766 if (nz != np) have_null = PETSC_FALSE; 1767 } 1768 1769 if (recompute_zerodiag) { 1770 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1771 if (pcbddc->benign_n == 1) { 1772 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 1773 zerodiag = zerodiag_subs[0]; 1774 } else { 1775 PetscInt i,nzn,*new_idxs; 1776 1777 nzn = 0; 1778 for (i=0;i<pcbddc->benign_n;i++) { 1779 PetscInt ns; 1780 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 1781 nzn += ns; 1782 } 1783 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 1784 nzn = 0; 1785 for (i=0;i<pcbddc->benign_n;i++) { 1786 PetscInt ns,*idxs; 1787 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 1788 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1789 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 1790 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1791 nzn += ns; 1792 } 1793 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 1794 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 1795 } 1796 have_null = PETSC_FALSE; 1797 } 1798 1799 /* Prepare matrix to compute no-net-flux */ 1800 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 1801 Mat A,loc_divudotp; 1802 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 1803 IS row,col,isused = NULL; 1804 PetscInt M,N,n,st,n_isused; 1805 1806 if (pressures) { 1807 isused = pressures; 1808 } else { 1809 isused = zerodiag; 1810 } 1811 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 1812 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1813 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 1814 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 1815 n_isused = 0; 1816 if (isused) { 1817 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 1818 } 1819 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1820 st = st-n_isused; 1821 if (n) { 1822 const PetscInt *gidxs; 1823 1824 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 1825 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1826 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 1827 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1828 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1829 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1830 } else { 1831 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 1832 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1833 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1834 } 1835 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 1836 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 1837 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 1838 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 1839 ierr = ISDestroy(&row);CHKERRQ(ierr); 1840 ierr = ISDestroy(&col);CHKERRQ(ierr); 1841 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 1842 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 1843 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1844 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1845 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1846 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1847 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1848 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1849 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1850 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1851 } 1852 1853 /* change of basis and p0 dofs */ 1854 if (has_null_pressures) { 1855 IS zerodiagc; 1856 const PetscInt *idxs,*idxsc; 1857 PetscInt i,s,*nnz; 1858 1859 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1860 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1861 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1862 /* local change of basis for pressures */ 1863 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1864 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1865 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1866 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1867 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1868 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1869 for (i=0;i<pcbddc->benign_n;i++) { 1870 PetscInt nzs,j; 1871 1872 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1873 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1874 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1875 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1876 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1877 } 1878 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1879 ierr = PetscFree(nnz);CHKERRQ(ierr); 1880 /* set identity on velocities */ 1881 for (i=0;i<n-nz;i++) { 1882 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1883 } 1884 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1885 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1886 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1887 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1888 /* set change on pressures */ 1889 for (s=0;s<pcbddc->benign_n;s++) { 1890 PetscScalar *array; 1891 PetscInt nzs; 1892 1893 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1894 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1895 for (i=0;i<nzs-1;i++) { 1896 PetscScalar vals[2]; 1897 PetscInt cols[2]; 1898 1899 cols[0] = idxs[i]; 1900 cols[1] = idxs[nzs-1]; 1901 vals[0] = 1.; 1902 vals[1] = 1.; 1903 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1904 } 1905 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1906 for (i=0;i<nzs-1;i++) array[i] = -1.; 1907 array[nzs-1] = 1.; 1908 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1909 /* store local idxs for p0 */ 1910 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1911 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1912 ierr = PetscFree(array);CHKERRQ(ierr); 1913 } 1914 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1915 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1916 /* project if needed */ 1917 if (pcbddc->benign_change_explicit) { 1918 Mat M; 1919 1920 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1921 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1922 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1923 ierr = MatDestroy(&M);CHKERRQ(ierr); 1924 } 1925 /* store global idxs for p0 */ 1926 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1927 } 1928 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1929 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1930 1931 /* determines if the coarse solver will be singular or not */ 1932 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1933 /* determines if the problem has subdomains with 0 pressure block */ 1934 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1935 *zerodiaglocal = zerodiag; 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1941 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1942 { 1943 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1944 PetscScalar *array; 1945 PetscErrorCode ierr; 1946 1947 PetscFunctionBegin; 1948 if (!pcbddc->benign_sf) { 1949 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1950 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1951 } 1952 if (get) { 1953 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1954 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1955 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1956 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1957 } else { 1958 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1959 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1960 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1961 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1962 } 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1968 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1969 { 1970 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1971 PetscErrorCode ierr; 1972 1973 PetscFunctionBegin; 1974 /* TODO: add error checking 1975 - avoid nested pop (or push) calls. 1976 - cannot push before pop. 1977 - cannot call this if pcbddc->local_mat is NULL 1978 */ 1979 if (!pcbddc->benign_n) { 1980 PetscFunctionReturn(0); 1981 } 1982 if (pop) { 1983 if (pcbddc->benign_change_explicit) { 1984 IS is_p0; 1985 MatReuse reuse; 1986 1987 /* extract B_0 */ 1988 reuse = MAT_INITIAL_MATRIX; 1989 if (pcbddc->benign_B0) { 1990 reuse = MAT_REUSE_MATRIX; 1991 } 1992 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1993 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1994 /* remove rows and cols from local problem */ 1995 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1996 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1997 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1998 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1999 } else { 2000 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2001 PetscScalar *vals; 2002 PetscInt i,n,*idxs_ins; 2003 2004 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2005 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2006 if (!pcbddc->benign_B0) { 2007 PetscInt *nnz; 2008 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2009 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2010 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2011 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2012 for (i=0;i<pcbddc->benign_n;i++) { 2013 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2014 nnz[i] = n - nnz[i]; 2015 } 2016 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2017 ierr = PetscFree(nnz);CHKERRQ(ierr); 2018 } 2019 2020 for (i=0;i<pcbddc->benign_n;i++) { 2021 PetscScalar *array; 2022 PetscInt *idxs,j,nz,cum; 2023 2024 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2025 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2026 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2027 for (j=0;j<nz;j++) vals[j] = 1.; 2028 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2029 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2030 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2031 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2032 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2033 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2034 cum = 0; 2035 for (j=0;j<n;j++) { 2036 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2037 vals[cum] = array[j]; 2038 idxs_ins[cum] = j; 2039 cum++; 2040 } 2041 } 2042 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2043 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2044 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2045 } 2046 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2047 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2048 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2049 } 2050 } else { /* push */ 2051 if (pcbddc->benign_change_explicit) { 2052 PetscInt i; 2053 2054 for (i=0;i<pcbddc->benign_n;i++) { 2055 PetscScalar *B0_vals; 2056 PetscInt *B0_cols,B0_ncol; 2057 2058 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2059 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2060 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2061 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2062 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2063 } 2064 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2065 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2066 } else { 2067 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2068 } 2069 } 2070 PetscFunctionReturn(0); 2071 } 2072 2073 #undef __FUNCT__ 2074 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2075 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2076 { 2077 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2078 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2079 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2080 PetscBLASInt *B_iwork,*B_ifail; 2081 PetscScalar *work,lwork; 2082 PetscScalar *St,*S,*eigv; 2083 PetscScalar *Sarray,*Starray; 2084 PetscReal *eigs,thresh; 2085 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2086 PetscBool allocated_S_St; 2087 #if defined(PETSC_USE_COMPLEX) 2088 PetscReal *rwork; 2089 #endif 2090 PetscErrorCode ierr; 2091 2092 PetscFunctionBegin; 2093 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2094 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2095 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2096 2097 if (pcbddc->dbg_flag) { 2098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2099 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2100 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2101 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2102 } 2103 2104 if (pcbddc->dbg_flag) { 2105 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2106 } 2107 2108 /* max size of subsets */ 2109 mss = 0; 2110 for (i=0;i<sub_schurs->n_subs;i++) { 2111 PetscInt subset_size; 2112 2113 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2114 mss = PetscMax(mss,subset_size); 2115 } 2116 2117 /* min/max and threshold */ 2118 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2119 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2120 nmax = PetscMax(nmin,nmax); 2121 allocated_S_St = PETSC_FALSE; 2122 if (nmin) { 2123 allocated_S_St = PETSC_TRUE; 2124 } 2125 2126 /* allocate lapack workspace */ 2127 cum = cum2 = 0; 2128 maxneigs = 0; 2129 for (i=0;i<sub_schurs->n_subs;i++) { 2130 PetscInt n,subset_size; 2131 2132 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2133 n = PetscMin(subset_size,nmax); 2134 cum += subset_size; 2135 cum2 += subset_size*n; 2136 maxneigs = PetscMax(maxneigs,n); 2137 } 2138 if (mss) { 2139 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2140 PetscBLASInt B_itype = 1; 2141 PetscBLASInt B_N = mss; 2142 PetscReal zero = 0.0; 2143 PetscReal eps = 0.0; /* dlamch? */ 2144 2145 B_lwork = -1; 2146 S = NULL; 2147 St = NULL; 2148 eigs = NULL; 2149 eigv = NULL; 2150 B_iwork = NULL; 2151 B_ifail = NULL; 2152 #if defined(PETSC_USE_COMPLEX) 2153 rwork = NULL; 2154 #endif 2155 thresh = 1.0; 2156 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2157 #if defined(PETSC_USE_COMPLEX) 2158 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2159 #else 2160 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2161 #endif 2162 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2163 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2164 } else { 2165 /* TODO */ 2166 } 2167 } else { 2168 lwork = 0; 2169 } 2170 2171 nv = 0; 2172 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2173 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2174 } 2175 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2176 if (allocated_S_St) { 2177 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2178 } 2179 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2180 #if defined(PETSC_USE_COMPLEX) 2181 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2182 #endif 2183 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2184 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2185 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2186 nv+cum,&pcbddc->adaptive_constraints_idxs, 2187 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2188 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2189 2190 maxneigs = 0; 2191 cum = cumarray = 0; 2192 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2193 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2194 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2195 const PetscInt *idxs; 2196 2197 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2198 for (cum=0;cum<nv;cum++) { 2199 pcbddc->adaptive_constraints_n[cum] = 1; 2200 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2201 pcbddc->adaptive_constraints_data[cum] = 1.0; 2202 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2203 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2204 } 2205 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2206 } 2207 2208 if (mss) { /* multilevel */ 2209 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2210 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2211 } 2212 2213 thresh = pcbddc->adaptive_threshold; 2214 for (i=0;i<sub_schurs->n_subs;i++) { 2215 const PetscInt *idxs; 2216 PetscReal upper,lower; 2217 PetscInt j,subset_size,eigs_start = 0; 2218 PetscBLASInt B_N; 2219 PetscBool same_data = PETSC_FALSE; 2220 2221 if (pcbddc->use_deluxe_scaling) { 2222 upper = PETSC_MAX_REAL; 2223 lower = thresh; 2224 } else { 2225 upper = 1./thresh; 2226 lower = 0.; 2227 } 2228 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2229 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2230 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2231 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2232 if (sub_schurs->is_hermitian) { 2233 PetscInt j,k; 2234 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2235 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2236 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2237 } 2238 for (j=0;j<subset_size;j++) { 2239 for (k=j;k<subset_size;k++) { 2240 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2241 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2242 } 2243 } 2244 } else { 2245 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2246 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2247 } 2248 } else { 2249 S = Sarray + cumarray; 2250 St = Starray + cumarray; 2251 } 2252 /* see if we can save some work */ 2253 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2254 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2255 } 2256 2257 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2258 B_neigs = 0; 2259 } else { 2260 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2261 PetscBLASInt B_itype = 1; 2262 PetscBLASInt B_IL, B_IU; 2263 PetscReal eps = -1.0; /* dlamch? */ 2264 PetscInt nmin_s; 2265 PetscBool compute_range = PETSC_FALSE; 2266 2267 if (pcbddc->dbg_flag) { 2268 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2269 } 2270 2271 compute_range = PETSC_FALSE; 2272 if (thresh > 1.+PETSC_SMALL && !same_data) { 2273 compute_range = PETSC_TRUE; 2274 } 2275 2276 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2277 if (compute_range) { 2278 2279 /* ask for eigenvalues larger than thresh */ 2280 #if defined(PETSC_USE_COMPLEX) 2281 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2282 #else 2283 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2284 #endif 2285 } else if (!same_data) { 2286 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2287 B_IL = 1; 2288 #if defined(PETSC_USE_COMPLEX) 2289 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2290 #else 2291 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2292 #endif 2293 } else { /* same_data is true, so get the adaptive function requested by the user */ 2294 PetscInt k; 2295 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2296 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2297 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2298 nmin = nmax; 2299 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2300 for (k=0;k<nmax;k++) { 2301 eigs[k] = 1./PETSC_SMALL; 2302 eigv[k*(subset_size+1)] = 1.0; 2303 } 2304 } 2305 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2306 if (B_ierr) { 2307 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2308 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2309 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2310 } 2311 2312 if (B_neigs > nmax) { 2313 if (pcbddc->dbg_flag) { 2314 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2315 } 2316 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2317 B_neigs = nmax; 2318 } 2319 2320 nmin_s = PetscMin(nmin,B_N); 2321 if (B_neigs < nmin_s) { 2322 PetscBLASInt B_neigs2; 2323 2324 if (pcbddc->use_deluxe_scaling) { 2325 B_IL = B_N - nmin_s + 1; 2326 B_IU = B_N - B_neigs; 2327 } else { 2328 B_IL = B_neigs + 1; 2329 B_IU = nmin_s; 2330 } 2331 if (pcbddc->dbg_flag) { 2332 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2333 } 2334 if (sub_schurs->is_hermitian) { 2335 PetscInt j,k; 2336 for (j=0;j<subset_size;j++) { 2337 for (k=j;k<subset_size;k++) { 2338 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2339 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2340 } 2341 } 2342 } else { 2343 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2344 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2345 } 2346 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2347 #if defined(PETSC_USE_COMPLEX) 2348 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2349 #else 2350 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2351 #endif 2352 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2353 B_neigs += B_neigs2; 2354 } 2355 if (B_ierr) { 2356 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2357 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2358 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2359 } 2360 if (pcbddc->dbg_flag) { 2361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2362 for (j=0;j<B_neigs;j++) { 2363 if (eigs[j] == 0.0) { 2364 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2365 } else { 2366 if (pcbddc->use_deluxe_scaling) { 2367 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2368 } else { 2369 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2370 } 2371 } 2372 } 2373 } 2374 } else { 2375 /* TODO */ 2376 } 2377 } 2378 /* change the basis back to the original one */ 2379 if (sub_schurs->change) { 2380 Mat change,phi,phit; 2381 2382 if (pcbddc->dbg_flag > 1) { 2383 PetscInt ii; 2384 for (ii=0;ii<B_neigs;ii++) { 2385 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2386 for (j=0;j<B_N;j++) { 2387 #if defined(PETSC_USE_COMPLEX) 2388 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2389 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2390 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2391 #else 2392 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2393 #endif 2394 } 2395 } 2396 } 2397 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2398 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2399 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2400 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2401 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2402 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2403 } 2404 maxneigs = PetscMax(B_neigs,maxneigs); 2405 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2406 if (B_neigs) { 2407 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2408 2409 if (pcbddc->dbg_flag > 1) { 2410 PetscInt ii; 2411 for (ii=0;ii<B_neigs;ii++) { 2412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2413 for (j=0;j<B_N;j++) { 2414 #if defined(PETSC_USE_COMPLEX) 2415 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2416 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2417 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2418 #else 2419 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2420 #endif 2421 } 2422 } 2423 } 2424 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2425 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2426 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2427 cum++; 2428 } 2429 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2430 /* shift for next computation */ 2431 cumarray += subset_size*subset_size; 2432 } 2433 if (pcbddc->dbg_flag) { 2434 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2435 } 2436 2437 if (mss) { 2438 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2439 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2440 /* destroy matrices (junk) */ 2441 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2442 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2443 } 2444 if (allocated_S_St) { 2445 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2446 } 2447 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2448 #if defined(PETSC_USE_COMPLEX) 2449 ierr = PetscFree(rwork);CHKERRQ(ierr); 2450 #endif 2451 if (pcbddc->dbg_flag) { 2452 PetscInt maxneigs_r; 2453 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2454 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2455 } 2456 PetscFunctionReturn(0); 2457 } 2458 2459 #undef __FUNCT__ 2460 #define __FUNCT__ "PCBDDCSetUpSolvers" 2461 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2462 { 2463 PetscScalar *coarse_submat_vals; 2464 PetscErrorCode ierr; 2465 2466 PetscFunctionBegin; 2467 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2468 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2469 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2470 2471 /* Setup local neumann solver ksp_R */ 2472 /* PCBDDCSetUpLocalScatters should be called first! */ 2473 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2474 2475 /* 2476 Setup local correction and local part of coarse basis. 2477 Gives back the dense local part of the coarse matrix in column major ordering 2478 */ 2479 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2480 2481 /* Compute total number of coarse nodes and setup coarse solver */ 2482 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2483 2484 /* free */ 2485 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2486 PetscFunctionReturn(0); 2487 } 2488 2489 #undef __FUNCT__ 2490 #define __FUNCT__ "PCBDDCResetCustomization" 2491 PetscErrorCode PCBDDCResetCustomization(PC pc) 2492 { 2493 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2494 PetscErrorCode ierr; 2495 2496 PetscFunctionBegin; 2497 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2498 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2499 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2500 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2501 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2502 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2503 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2504 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2505 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2506 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2507 PetscFunctionReturn(0); 2508 } 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "PCBDDCResetTopography" 2512 PetscErrorCode PCBDDCResetTopography(PC pc) 2513 { 2514 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2515 PetscInt i; 2516 PetscErrorCode ierr; 2517 2518 PetscFunctionBegin; 2519 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2520 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2521 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2522 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2523 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2524 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2525 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2526 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2527 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2528 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2529 pcbddc->graphanalyzed = PETSC_FALSE; 2530 for (i=0;i<pcbddc->n_local_subs;i++) { 2531 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2532 } 2533 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2534 if (pcbddc->sub_schurs) { 2535 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2536 } 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "PCBDDCResetSolvers" 2542 PetscErrorCode PCBDDCResetSolvers(PC pc) 2543 { 2544 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2545 PetscErrorCode ierr; 2546 2547 PetscFunctionBegin; 2548 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2549 if (pcbddc->coarse_phi_B) { 2550 PetscScalar *array; 2551 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2552 ierr = PetscFree(array);CHKERRQ(ierr); 2553 } 2554 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2555 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2556 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2557 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2558 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2559 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2560 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2561 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2562 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2563 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2564 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2565 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2566 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2567 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2568 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2569 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2570 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2571 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2572 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2573 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2574 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2575 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2576 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2577 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2578 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2579 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2580 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2581 if (pcbddc->benign_zerodiag_subs) { 2582 PetscInt i; 2583 for (i=0;i<pcbddc->benign_n;i++) { 2584 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2585 } 2586 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2587 } 2588 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2589 PetscFunctionReturn(0); 2590 } 2591 2592 #undef __FUNCT__ 2593 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2594 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2595 { 2596 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2597 PC_IS *pcis = (PC_IS*)pc->data; 2598 VecType impVecType; 2599 PetscInt n_constraints,n_R,old_size; 2600 PetscErrorCode ierr; 2601 2602 PetscFunctionBegin; 2603 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2604 /* get sizes */ 2605 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2606 n_R = pcis->n - pcbddc->n_vertices; 2607 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2608 /* local work vectors (try to avoid unneeded work)*/ 2609 /* R nodes */ 2610 old_size = -1; 2611 if (pcbddc->vec1_R) { 2612 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2613 } 2614 if (n_R != old_size) { 2615 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2616 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2617 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2618 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2619 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2620 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2621 } 2622 /* local primal dofs */ 2623 old_size = -1; 2624 if (pcbddc->vec1_P) { 2625 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2626 } 2627 if (pcbddc->local_primal_size != old_size) { 2628 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2629 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2630 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2631 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2632 } 2633 /* local explicit constraints */ 2634 old_size = -1; 2635 if (pcbddc->vec1_C) { 2636 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2637 } 2638 if (n_constraints && n_constraints != old_size) { 2639 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2640 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2641 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2642 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2643 } 2644 PetscFunctionReturn(0); 2645 } 2646 2647 #undef __FUNCT__ 2648 #define __FUNCT__ "PCBDDCSetUpCorrection" 2649 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2650 { 2651 PetscErrorCode ierr; 2652 /* pointers to pcis and pcbddc */ 2653 PC_IS* pcis = (PC_IS*)pc->data; 2654 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2655 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2656 /* submatrices of local problem */ 2657 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2658 /* submatrices of local coarse problem */ 2659 Mat S_VV,S_CV,S_VC,S_CC; 2660 /* working matrices */ 2661 Mat C_CR; 2662 /* additional working stuff */ 2663 PC pc_R; 2664 Mat F; 2665 Vec dummy_vec; 2666 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2667 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2668 PetscScalar *work; 2669 PetscInt *idx_V_B; 2670 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2671 PetscInt i,n_R,n_D,n_B; 2672 2673 /* some shortcuts to scalars */ 2674 PetscScalar one=1.0,m_one=-1.0; 2675 2676 PetscFunctionBegin; 2677 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 2678 2679 /* Set Non-overlapping dimensions */ 2680 n_vertices = pcbddc->n_vertices; 2681 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 2682 n_B = pcis->n_B; 2683 n_D = pcis->n - n_B; 2684 n_R = pcis->n - n_vertices; 2685 2686 /* vertices in boundary numbering */ 2687 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 2688 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 2689 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 2690 2691 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2692 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 2693 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 2694 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 2695 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 2696 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 2697 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 2698 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 2699 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 2700 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 2701 2702 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 2703 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 2704 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 2705 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 2706 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 2707 lda_rhs = n_R; 2708 need_benign_correction = PETSC_FALSE; 2709 if (isLU || isILU || isCHOL) { 2710 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 2711 } else if (sub_schurs && sub_schurs->reuse_solver) { 2712 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2713 MatFactorType type; 2714 2715 F = reuse_solver->F; 2716 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 2717 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 2718 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 2719 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 2720 } else { 2721 F = NULL; 2722 } 2723 2724 /* allocate workspace */ 2725 n = 0; 2726 if (n_constraints) { 2727 n += lda_rhs*n_constraints; 2728 } 2729 if (n_vertices) { 2730 n = PetscMax(2*lda_rhs*n_vertices,n); 2731 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 2732 } 2733 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 2734 2735 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 2736 dummy_vec = NULL; 2737 if (need_benign_correction && lda_rhs != n_R && F) { 2738 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 2739 } 2740 2741 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2742 if (n_constraints) { 2743 Mat M1,M2,M3,C_B; 2744 IS is_aux; 2745 PetscScalar *array,*array2; 2746 2747 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2748 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2749 2750 /* Extract constraints on R nodes: C_{CR} */ 2751 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 2752 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2753 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2754 2755 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 2756 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 2757 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2758 for (i=0;i<n_constraints;i++) { 2759 const PetscScalar *row_cmat_values; 2760 const PetscInt *row_cmat_indices; 2761 PetscInt size_of_constraint,j; 2762 2763 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 2764 for (j=0;j<size_of_constraint;j++) { 2765 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 2766 } 2767 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 2768 } 2769 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 2770 if (F) { 2771 Mat B; 2772 2773 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2774 if (need_benign_correction) { 2775 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2776 2777 /* rhs is already zero on interior dofs, no need to change the rhs */ 2778 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 2779 } 2780 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 2781 if (need_benign_correction) { 2782 PetscScalar *marr; 2783 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2784 2785 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2786 if (lda_rhs != n_R) { 2787 for (i=0;i<n_constraints;i++) { 2788 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2789 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2790 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2791 } 2792 } else { 2793 for (i=0;i<n_constraints;i++) { 2794 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2795 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2796 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2797 } 2798 } 2799 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2800 } 2801 ierr = MatDestroy(&B);CHKERRQ(ierr); 2802 } else { 2803 PetscScalar *marr; 2804 2805 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2806 for (i=0;i<n_constraints;i++) { 2807 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2808 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 2809 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2810 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2811 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2812 } 2813 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2814 } 2815 if (!pcbddc->switch_static) { 2816 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2817 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 2818 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 2819 for (i=0;i<n_constraints;i++) { 2820 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 2821 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 2822 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2823 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2824 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2825 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2826 } 2827 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 2828 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 2829 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2830 } else { 2831 if (lda_rhs != n_R) { 2832 IS dummy; 2833 2834 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 2835 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2836 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 2837 } else { 2838 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 2839 pcbddc->local_auxmat2 = local_auxmat2_R; 2840 } 2841 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2842 } 2843 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2844 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2845 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 2846 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 2847 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 2848 if (isCHOL) { 2849 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 2850 } else { 2851 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 2852 } 2853 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2854 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2855 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2856 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2857 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2858 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2859 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2860 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2861 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2862 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2863 } 2864 2865 /* Get submatrices from subdomain matrix */ 2866 if (n_vertices) { 2867 IS is_aux; 2868 2869 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2870 IS tis; 2871 2872 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2873 ierr = ISSort(tis);CHKERRQ(ierr); 2874 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2875 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2876 } else { 2877 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2878 } 2879 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2880 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2881 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2882 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2883 } 2884 2885 /* Matrix of coarse basis functions (local) */ 2886 if (pcbddc->coarse_phi_B) { 2887 PetscInt on_B,on_primal,on_D=n_D; 2888 if (pcbddc->coarse_phi_D) { 2889 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2890 } 2891 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2892 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2893 PetscScalar *marray; 2894 2895 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2896 ierr = PetscFree(marray);CHKERRQ(ierr); 2897 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2898 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2899 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2900 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2901 } 2902 } 2903 2904 if (!pcbddc->coarse_phi_B) { 2905 PetscScalar *marray; 2906 2907 n = n_B*pcbddc->local_primal_size; 2908 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2909 n += n_D*pcbddc->local_primal_size; 2910 } 2911 if (!pcbddc->symmetric_primal) { 2912 n *= 2; 2913 } 2914 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2915 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2916 n = n_B*pcbddc->local_primal_size; 2917 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2918 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2919 n += n_D*pcbddc->local_primal_size; 2920 } 2921 if (!pcbddc->symmetric_primal) { 2922 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2923 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2924 n = n_B*pcbddc->local_primal_size; 2925 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2926 } 2927 } else { 2928 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2929 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2930 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2931 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2932 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2933 } 2934 } 2935 } 2936 2937 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2938 p0_lidx_I = NULL; 2939 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2940 const PetscInt *idxs; 2941 2942 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2943 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2944 for (i=0;i<pcbddc->benign_n;i++) { 2945 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2946 } 2947 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2948 } 2949 2950 /* vertices */ 2951 if (n_vertices) { 2952 2953 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2954 2955 if (n_R) { 2956 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2957 PetscBLASInt B_N,B_one = 1; 2958 PetscScalar *x,*y; 2959 PetscBool isseqaij; 2960 2961 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2962 if (need_benign_correction) { 2963 ISLocalToGlobalMapping RtoN; 2964 IS is_p0; 2965 PetscInt *idxs_p0,n; 2966 2967 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2968 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2969 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2970 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2971 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2972 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2973 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2974 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2975 } 2976 2977 if (lda_rhs == n_R) { 2978 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2979 } else { 2980 PetscScalar *av,*array; 2981 const PetscInt *xadj,*adjncy; 2982 PetscInt n; 2983 PetscBool flg_row; 2984 2985 array = work+lda_rhs*n_vertices; 2986 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2987 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2988 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2989 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2990 for (i=0;i<n;i++) { 2991 PetscInt j; 2992 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2993 } 2994 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2995 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2996 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2997 } 2998 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2999 if (need_benign_correction) { 3000 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3001 PetscScalar *marr; 3002 3003 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3004 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3005 3006 | 0 0 0 | (V) 3007 L = | 0 0 -1 | (P-p0) 3008 | 0 0 -1 | (p0) 3009 3010 */ 3011 for (i=0;i<reuse_solver->benign_n;i++) { 3012 const PetscScalar *vals; 3013 const PetscInt *idxs,*idxs_zero; 3014 PetscInt n,j,nz; 3015 3016 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3017 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3018 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3019 for (j=0;j<n;j++) { 3020 PetscScalar val = vals[j]; 3021 PetscInt k,col = idxs[j]; 3022 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3023 } 3024 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3025 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3026 } 3027 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3028 } 3029 if (F) { 3030 /* need to correct the rhs */ 3031 if (need_benign_correction) { 3032 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3033 PetscScalar *marr; 3034 3035 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3036 if (lda_rhs != n_R) { 3037 for (i=0;i<n_vertices;i++) { 3038 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3039 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3040 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3041 } 3042 } else { 3043 for (i=0;i<n_vertices;i++) { 3044 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3045 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3046 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3047 } 3048 } 3049 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3050 } 3051 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3052 /* need to correct the solution */ 3053 if (need_benign_correction) { 3054 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3055 PetscScalar *marr; 3056 3057 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3058 if (lda_rhs != n_R) { 3059 for (i=0;i<n_vertices;i++) { 3060 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3061 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3062 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3063 } 3064 } else { 3065 for (i=0;i<n_vertices;i++) { 3066 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3067 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3068 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3069 } 3070 } 3071 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3072 } 3073 } else { 3074 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3075 for (i=0;i<n_vertices;i++) { 3076 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3077 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3078 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3079 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3080 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3081 } 3082 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3083 } 3084 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3085 /* S_VV and S_CV */ 3086 if (n_constraints) { 3087 Mat B; 3088 3089 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3090 for (i=0;i<n_vertices;i++) { 3091 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3092 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3093 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3094 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3095 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3096 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3097 } 3098 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3099 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3100 ierr = MatDestroy(&B);CHKERRQ(ierr); 3101 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3102 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3103 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3104 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3105 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3106 ierr = MatDestroy(&B);CHKERRQ(ierr); 3107 } 3108 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3109 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3110 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3111 } 3112 if (lda_rhs != n_R) { 3113 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3114 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3115 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3116 } 3117 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3118 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3119 if (need_benign_correction) { 3120 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3121 PetscScalar *marr,*sums; 3122 3123 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3124 ierr = MatDenseGetArray(S_VVt,&marr); 3125 for (i=0;i<reuse_solver->benign_n;i++) { 3126 const PetscScalar *vals; 3127 const PetscInt *idxs,*idxs_zero; 3128 PetscInt n,j,nz; 3129 3130 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3131 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3132 for (j=0;j<n_vertices;j++) { 3133 PetscInt k; 3134 sums[j] = 0.; 3135 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3136 } 3137 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3138 for (j=0;j<n;j++) { 3139 PetscScalar val = vals[j]; 3140 PetscInt k; 3141 for (k=0;k<n_vertices;k++) { 3142 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3143 } 3144 } 3145 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3146 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3147 } 3148 ierr = PetscFree(sums);CHKERRQ(ierr); 3149 ierr = MatDenseRestoreArray(S_VVt,&marr); 3150 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3151 } 3152 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3153 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3154 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3155 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3156 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3157 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3158 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3159 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3160 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3161 } else { 3162 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3163 } 3164 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3165 3166 /* coarse basis functions */ 3167 for (i=0;i<n_vertices;i++) { 3168 PetscScalar *y; 3169 3170 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3171 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3172 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3173 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3174 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3175 y[n_B*i+idx_V_B[i]] = 1.0; 3176 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3177 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3178 3179 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3180 PetscInt j; 3181 3182 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3183 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3184 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3185 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3186 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3187 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3188 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3189 } 3190 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3191 } 3192 /* if n_R == 0 the object is not destroyed */ 3193 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3194 } 3195 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3196 3197 if (n_constraints) { 3198 Mat B; 3199 3200 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3201 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3202 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3203 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3204 if (n_vertices) { 3205 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3206 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3207 } else { 3208 Mat S_VCt; 3209 3210 if (lda_rhs != n_R) { 3211 ierr = MatDestroy(&B);CHKERRQ(ierr); 3212 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3213 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3214 } 3215 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3216 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3217 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3218 } 3219 } 3220 ierr = MatDestroy(&B);CHKERRQ(ierr); 3221 /* coarse basis functions */ 3222 for (i=0;i<n_constraints;i++) { 3223 PetscScalar *y; 3224 3225 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3226 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3227 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3228 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3229 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3230 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3231 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3232 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3233 PetscInt j; 3234 3235 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3236 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3237 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3238 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3239 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3240 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3241 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3242 } 3243 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3244 } 3245 } 3246 if (n_constraints) { 3247 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3248 } 3249 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3250 3251 /* coarse matrix entries relative to B_0 */ 3252 if (pcbddc->benign_n) { 3253 Mat B0_B,B0_BPHI; 3254 IS is_dummy; 3255 PetscScalar *data; 3256 PetscInt j; 3257 3258 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3259 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3260 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3261 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3262 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3263 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3264 for (j=0;j<pcbddc->benign_n;j++) { 3265 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3266 for (i=0;i<pcbddc->local_primal_size;i++) { 3267 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3268 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3269 } 3270 } 3271 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3272 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3273 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3274 } 3275 3276 /* compute other basis functions for non-symmetric problems */ 3277 if (!pcbddc->symmetric_primal) { 3278 Mat B_V=NULL,B_C=NULL; 3279 PetscScalar *marray; 3280 3281 if (n_constraints) { 3282 Mat S_CCT,C_CRT; 3283 3284 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3285 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3286 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3287 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3288 if (n_vertices) { 3289 Mat S_VCT; 3290 3291 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3292 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3293 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3294 } 3295 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3296 } else { 3297 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3298 } 3299 if (n_vertices && n_R) { 3300 PetscScalar *av,*marray; 3301 const PetscInt *xadj,*adjncy; 3302 PetscInt n; 3303 PetscBool flg_row; 3304 3305 /* B_V = B_V - A_VR^T */ 3306 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3307 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3308 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3309 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3310 for (i=0;i<n;i++) { 3311 PetscInt j; 3312 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3313 } 3314 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3315 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3316 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3317 } 3318 3319 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3320 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3321 for (i=0;i<n_vertices;i++) { 3322 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3323 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3324 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3325 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3326 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3327 } 3328 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3329 if (B_C) { 3330 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3331 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3332 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3333 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3334 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3335 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3336 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3337 } 3338 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3339 } 3340 /* coarse basis functions */ 3341 for (i=0;i<pcbddc->local_primal_size;i++) { 3342 PetscScalar *y; 3343 3344 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3345 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3346 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3347 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3348 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3349 if (i<n_vertices) { 3350 y[n_B*i+idx_V_B[i]] = 1.0; 3351 } 3352 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3353 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3354 3355 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3356 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3357 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3358 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3359 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3360 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3361 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3362 } 3363 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3364 } 3365 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3366 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3367 } 3368 /* free memory */ 3369 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3370 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3371 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3372 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3373 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3374 ierr = PetscFree(work);CHKERRQ(ierr); 3375 if (n_vertices) { 3376 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3377 } 3378 if (n_constraints) { 3379 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3380 } 3381 /* Checking coarse_sub_mat and coarse basis functios */ 3382 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3383 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3384 if (pcbddc->dbg_flag) { 3385 Mat coarse_sub_mat; 3386 Mat AUXMAT,TM1,TM2,TM3,TM4; 3387 Mat coarse_phi_D,coarse_phi_B; 3388 Mat coarse_psi_D,coarse_psi_B; 3389 Mat A_II,A_BB,A_IB,A_BI; 3390 Mat C_B,CPHI; 3391 IS is_dummy; 3392 Vec mones; 3393 MatType checkmattype=MATSEQAIJ; 3394 PetscReal real_value; 3395 3396 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3397 Mat A; 3398 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3399 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3400 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3401 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3402 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3403 ierr = MatDestroy(&A);CHKERRQ(ierr); 3404 } else { 3405 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3406 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3407 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3408 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3409 } 3410 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3411 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3412 if (!pcbddc->symmetric_primal) { 3413 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3414 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3415 } 3416 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3417 3418 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3419 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3420 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3421 if (!pcbddc->symmetric_primal) { 3422 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3423 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3424 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3425 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3426 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3427 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3428 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3429 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3430 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3431 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3432 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3433 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3434 } else { 3435 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3436 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3437 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3438 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3439 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3440 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3441 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3442 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3443 } 3444 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3445 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3446 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3447 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3448 if (pcbddc->benign_n) { 3449 Mat B0_B,B0_BPHI; 3450 PetscScalar *data,*data2; 3451 PetscInt j; 3452 3453 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3454 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3455 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3456 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3457 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3458 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3459 for (j=0;j<pcbddc->benign_n;j++) { 3460 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3461 for (i=0;i<pcbddc->local_primal_size;i++) { 3462 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3463 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3464 } 3465 } 3466 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3467 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3468 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3469 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3470 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3471 } 3472 #if 0 3473 { 3474 PetscViewer viewer; 3475 char filename[256]; 3476 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3477 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3478 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3479 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3480 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3481 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3482 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3483 if (save_change) { 3484 Mat phi_B; 3485 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3486 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3487 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3488 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3489 } else { 3490 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3491 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3492 } 3493 if (pcbddc->coarse_phi_D) { 3494 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3495 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3496 } 3497 if (pcbddc->coarse_psi_B) { 3498 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3499 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3500 } 3501 if (pcbddc->coarse_psi_D) { 3502 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3503 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3504 } 3505 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3506 } 3507 #endif 3508 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3509 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3510 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3511 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3512 3513 /* check constraints */ 3514 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3515 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3516 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3517 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3518 } else { 3519 PetscScalar *data; 3520 Mat tmat; 3521 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3522 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3523 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3524 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3525 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3526 } 3527 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3528 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3529 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3530 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3531 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3532 if (!pcbddc->symmetric_primal) { 3533 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3534 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3535 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3536 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3537 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3538 } 3539 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3540 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3541 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3542 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3543 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3544 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3545 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3546 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3547 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3548 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3549 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3550 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3551 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3552 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3553 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3554 if (!pcbddc->symmetric_primal) { 3555 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3556 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3557 } 3558 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3559 } 3560 /* get back data */ 3561 *coarse_submat_vals_n = coarse_submat_vals; 3562 PetscFunctionReturn(0); 3563 } 3564 3565 #undef __FUNCT__ 3566 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3567 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3568 { 3569 Mat *work_mat; 3570 IS isrow_s,iscol_s; 3571 PetscBool rsorted,csorted; 3572 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3573 PetscErrorCode ierr; 3574 3575 PetscFunctionBegin; 3576 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3577 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3578 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3579 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3580 3581 if (!rsorted) { 3582 const PetscInt *idxs; 3583 PetscInt *idxs_sorted,i; 3584 3585 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3586 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3587 for (i=0;i<rsize;i++) { 3588 idxs_perm_r[i] = i; 3589 } 3590 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3591 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3592 for (i=0;i<rsize;i++) { 3593 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3594 } 3595 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3596 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3597 } else { 3598 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3599 isrow_s = isrow; 3600 } 3601 3602 if (!csorted) { 3603 if (isrow == iscol) { 3604 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3605 iscol_s = isrow_s; 3606 } else { 3607 const PetscInt *idxs; 3608 PetscInt *idxs_sorted,i; 3609 3610 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3611 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3612 for (i=0;i<csize;i++) { 3613 idxs_perm_c[i] = i; 3614 } 3615 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3616 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3617 for (i=0;i<csize;i++) { 3618 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3619 } 3620 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3621 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3622 } 3623 } else { 3624 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3625 iscol_s = iscol; 3626 } 3627 3628 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3629 3630 if (!rsorted || !csorted) { 3631 Mat new_mat; 3632 IS is_perm_r,is_perm_c; 3633 3634 if (!rsorted) { 3635 PetscInt *idxs_r,i; 3636 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3637 for (i=0;i<rsize;i++) { 3638 idxs_r[idxs_perm_r[i]] = i; 3639 } 3640 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3641 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3642 } else { 3643 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3644 } 3645 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3646 3647 if (!csorted) { 3648 if (isrow_s == iscol_s) { 3649 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3650 is_perm_c = is_perm_r; 3651 } else { 3652 PetscInt *idxs_c,i; 3653 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3654 for (i=0;i<csize;i++) { 3655 idxs_c[idxs_perm_c[i]] = i; 3656 } 3657 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3658 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3659 } 3660 } else { 3661 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3662 } 3663 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3664 3665 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3666 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3667 work_mat[0] = new_mat; 3668 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3669 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3670 } 3671 3672 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 3673 *B = work_mat[0]; 3674 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 3675 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 3676 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 3677 PetscFunctionReturn(0); 3678 } 3679 3680 #undef __FUNCT__ 3681 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 3682 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 3683 { 3684 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3685 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3686 Mat new_mat; 3687 IS is_local,is_global; 3688 PetscInt local_size; 3689 PetscBool isseqaij; 3690 PetscErrorCode ierr; 3691 3692 PetscFunctionBegin; 3693 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3694 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 3695 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 3696 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 3697 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 3698 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 3699 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 3700 3701 /* check */ 3702 if (pcbddc->dbg_flag) { 3703 Vec x,x_change; 3704 PetscReal error; 3705 3706 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 3707 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3708 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 3709 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3710 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3711 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 3712 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3713 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3714 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3715 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3716 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3717 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 3718 ierr = VecDestroy(&x);CHKERRQ(ierr); 3719 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3720 } 3721 3722 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 3723 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3724 if (isseqaij) { 3725 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3726 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3727 } else { 3728 Mat work_mat; 3729 3730 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3731 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3732 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3733 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 3734 } 3735 if (matis->A->symmetric_set) { 3736 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 3737 #if !defined(PETSC_USE_COMPLEX) 3738 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 3739 #endif 3740 } 3741 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 3742 PetscFunctionReturn(0); 3743 } 3744 3745 #undef __FUNCT__ 3746 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 3747 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 3748 { 3749 PC_IS* pcis = (PC_IS*)(pc->data); 3750 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3751 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3752 PetscInt *idx_R_local=NULL; 3753 PetscInt n_vertices,i,j,n_R,n_D,n_B; 3754 PetscInt vbs,bs; 3755 PetscBT bitmask=NULL; 3756 PetscErrorCode ierr; 3757 3758 PetscFunctionBegin; 3759 /* 3760 No need to setup local scatters if 3761 - primal space is unchanged 3762 AND 3763 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 3764 AND 3765 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 3766 */ 3767 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 3768 PetscFunctionReturn(0); 3769 } 3770 /* destroy old objects */ 3771 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3772 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3773 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3774 /* Set Non-overlapping dimensions */ 3775 n_B = pcis->n_B; 3776 n_D = pcis->n - n_B; 3777 n_vertices = pcbddc->n_vertices; 3778 3779 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 3780 3781 /* create auxiliary bitmask and allocate workspace */ 3782 if (!sub_schurs || !sub_schurs->reuse_solver) { 3783 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 3784 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 3785 for (i=0;i<n_vertices;i++) { 3786 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 3787 } 3788 3789 for (i=0, n_R=0; i<pcis->n; i++) { 3790 if (!PetscBTLookup(bitmask,i)) { 3791 idx_R_local[n_R++] = i; 3792 } 3793 } 3794 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 3795 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3796 3797 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3798 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 3799 } 3800 3801 /* Block code */ 3802 vbs = 1; 3803 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 3804 if (bs>1 && !(n_vertices%bs)) { 3805 PetscBool is_blocked = PETSC_TRUE; 3806 PetscInt *vary; 3807 if (!sub_schurs || !sub_schurs->reuse_solver) { 3808 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 3809 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 3810 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 3811 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 3812 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 3813 for (i=0; i<pcis->n/bs; i++) { 3814 if (vary[i]!=0 && vary[i]!=bs) { 3815 is_blocked = PETSC_FALSE; 3816 break; 3817 } 3818 } 3819 ierr = PetscFree(vary);CHKERRQ(ierr); 3820 } else { 3821 /* Verify directly the R set */ 3822 for (i=0; i<n_R/bs; i++) { 3823 PetscInt j,node=idx_R_local[bs*i]; 3824 for (j=1; j<bs; j++) { 3825 if (node != idx_R_local[bs*i+j]-j) { 3826 is_blocked = PETSC_FALSE; 3827 break; 3828 } 3829 } 3830 } 3831 } 3832 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 3833 vbs = bs; 3834 for (i=0;i<n_R/vbs;i++) { 3835 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 3836 } 3837 } 3838 } 3839 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 3840 if (sub_schurs && sub_schurs->reuse_solver) { 3841 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3842 3843 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3844 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 3845 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 3846 reuse_solver->is_R = pcbddc->is_R_local; 3847 } else { 3848 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 3849 } 3850 3851 /* print some info if requested */ 3852 if (pcbddc->dbg_flag) { 3853 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3854 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3855 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3856 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3857 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3858 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3859 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3860 } 3861 3862 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3863 if (!sub_schurs || !sub_schurs->reuse_solver) { 3864 IS is_aux1,is_aux2; 3865 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3866 3867 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3868 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3869 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3870 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3871 for (i=0; i<n_D; i++) { 3872 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3873 } 3874 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3875 for (i=0, j=0; i<n_R; i++) { 3876 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3877 aux_array1[j++] = i; 3878 } 3879 } 3880 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3881 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3882 for (i=0, j=0; i<n_B; i++) { 3883 if (!PetscBTLookup(bitmask,is_indices[i])) { 3884 aux_array2[j++] = i; 3885 } 3886 } 3887 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3888 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3889 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3890 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3891 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3892 3893 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3894 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3895 for (i=0, j=0; i<n_R; i++) { 3896 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3897 aux_array1[j++] = i; 3898 } 3899 } 3900 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3901 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3902 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3903 } 3904 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3905 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3906 } else { 3907 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3908 IS tis; 3909 PetscInt schur_size; 3910 3911 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3912 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3913 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3914 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3915 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3916 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3917 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3918 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3919 } 3920 } 3921 PetscFunctionReturn(0); 3922 } 3923 3924 3925 #undef __FUNCT__ 3926 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3927 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3928 { 3929 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3930 PC_IS *pcis = (PC_IS*)pc->data; 3931 PC pc_temp; 3932 Mat A_RR; 3933 MatReuse reuse; 3934 PetscScalar m_one = -1.0; 3935 PetscReal value; 3936 PetscInt n_D,n_R; 3937 PetscBool check_corr[2],issbaij; 3938 PetscErrorCode ierr; 3939 /* prefixes stuff */ 3940 char dir_prefix[256],neu_prefix[256],str_level[16]; 3941 size_t len; 3942 3943 PetscFunctionBegin; 3944 3945 /* compute prefixes */ 3946 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3947 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3948 if (!pcbddc->current_level) { 3949 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3950 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3951 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3952 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3953 } else { 3954 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3955 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3956 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3957 len -= 15; /* remove "pc_bddc_coarse_" */ 3958 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3959 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3960 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3961 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3962 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3963 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3964 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3965 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3966 } 3967 3968 /* DIRICHLET PROBLEM */ 3969 if (dirichlet) { 3970 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3971 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3972 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3973 if (pcbddc->dbg_flag) { 3974 Mat A_IIn; 3975 3976 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3977 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3978 pcis->A_II = A_IIn; 3979 } 3980 } 3981 if (pcbddc->local_mat->symmetric_set) { 3982 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3983 } 3984 /* Matrix for Dirichlet problem is pcis->A_II */ 3985 n_D = pcis->n - pcis->n_B; 3986 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3987 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3988 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3989 /* default */ 3990 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3991 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3992 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3993 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3994 if (issbaij) { 3995 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3996 } else { 3997 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3998 } 3999 /* Allow user's customization */ 4000 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4001 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4002 } 4003 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4004 if (sub_schurs && sub_schurs->reuse_solver) { 4005 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4006 4007 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4008 } 4009 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4010 if (!n_D) { 4011 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4012 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4013 } 4014 /* Set Up KSP for Dirichlet problem of BDDC */ 4015 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4016 /* set ksp_D into pcis data */ 4017 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4018 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4019 pcis->ksp_D = pcbddc->ksp_D; 4020 } 4021 4022 /* NEUMANN PROBLEM */ 4023 A_RR = 0; 4024 if (neumann) { 4025 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4026 PetscInt ibs,mbs; 4027 PetscBool issbaij; 4028 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4029 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4030 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4031 if (pcbddc->ksp_R) { /* already created ksp */ 4032 PetscInt nn_R; 4033 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4034 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4035 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4036 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4037 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4038 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4039 reuse = MAT_INITIAL_MATRIX; 4040 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4041 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4042 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4043 reuse = MAT_INITIAL_MATRIX; 4044 } else { /* safe to reuse the matrix */ 4045 reuse = MAT_REUSE_MATRIX; 4046 } 4047 } 4048 /* last check */ 4049 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4050 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4051 reuse = MAT_INITIAL_MATRIX; 4052 } 4053 } else { /* first time, so we need to create the matrix */ 4054 reuse = MAT_INITIAL_MATRIX; 4055 } 4056 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4057 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4058 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4059 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4060 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4061 if (matis->A == pcbddc->local_mat) { 4062 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4063 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4064 } else { 4065 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4066 } 4067 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4068 if (matis->A == pcbddc->local_mat) { 4069 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4070 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4071 } else { 4072 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4073 } 4074 } 4075 /* extract A_RR */ 4076 if (sub_schurs && sub_schurs->reuse_solver) { 4077 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4078 4079 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4080 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4081 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4082 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4083 } else { 4084 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4085 } 4086 } else { 4087 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4088 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4089 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4090 } 4091 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4092 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4093 } 4094 if (pcbddc->local_mat->symmetric_set) { 4095 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4096 } 4097 if (!pcbddc->ksp_R) { /* create object if not present */ 4098 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4099 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4100 /* default */ 4101 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4102 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4103 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4104 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4105 if (issbaij) { 4106 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4107 } else { 4108 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4109 } 4110 /* Allow user's customization */ 4111 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4112 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4113 } 4114 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4115 if (!n_R) { 4116 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4117 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4118 } 4119 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4120 /* Reuse solver if it is present */ 4121 if (sub_schurs && sub_schurs->reuse_solver) { 4122 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4123 4124 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4125 } 4126 /* Set Up KSP for Neumann problem of BDDC */ 4127 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4128 } 4129 4130 if (pcbddc->dbg_flag) { 4131 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4132 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4133 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4134 } 4135 4136 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4137 check_corr[0] = check_corr[1] = PETSC_FALSE; 4138 if (pcbddc->NullSpace_corr[0]) { 4139 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4140 } 4141 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4142 check_corr[0] = PETSC_TRUE; 4143 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4144 } 4145 if (neumann && pcbddc->NullSpace_corr[2]) { 4146 check_corr[1] = PETSC_TRUE; 4147 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4148 } 4149 4150 /* check Dirichlet and Neumann solvers */ 4151 if (pcbddc->dbg_flag) { 4152 if (dirichlet) { /* Dirichlet */ 4153 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4154 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4155 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4156 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4157 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4158 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4159 if (check_corr[0]) { 4160 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4161 } 4162 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4163 } 4164 if (neumann) { /* Neumann */ 4165 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4166 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4167 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4168 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4169 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4170 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4171 if (check_corr[1]) { 4172 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4173 } 4174 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4175 } 4176 } 4177 /* free Neumann problem's matrix */ 4178 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4179 PetscFunctionReturn(0); 4180 } 4181 4182 #undef __FUNCT__ 4183 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4184 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4185 { 4186 PetscErrorCode ierr; 4187 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4188 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4189 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4190 4191 PetscFunctionBegin; 4192 if (!reuse_solver) { 4193 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4194 } 4195 if (!pcbddc->switch_static) { 4196 if (applytranspose && pcbddc->local_auxmat1) { 4197 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4198 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4199 } 4200 if (!reuse_solver) { 4201 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4202 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4203 } else { 4204 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4205 4206 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4207 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4208 } 4209 } else { 4210 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4211 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4212 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4213 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4214 if (applytranspose && pcbddc->local_auxmat1) { 4215 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4216 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4217 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4218 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4219 } 4220 } 4221 if (!reuse_solver || pcbddc->switch_static) { 4222 if (applytranspose) { 4223 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4224 } else { 4225 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4226 } 4227 } else { 4228 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4229 4230 if (applytranspose) { 4231 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4232 } else { 4233 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4234 } 4235 } 4236 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4237 if (!pcbddc->switch_static) { 4238 if (!reuse_solver) { 4239 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4240 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4241 } else { 4242 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4243 4244 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4245 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4246 } 4247 if (!applytranspose && pcbddc->local_auxmat1) { 4248 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4249 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4250 } 4251 } else { 4252 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4253 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4254 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4255 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4256 if (!applytranspose && pcbddc->local_auxmat1) { 4257 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4258 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4259 } 4260 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4261 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4262 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4263 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4264 } 4265 PetscFunctionReturn(0); 4266 } 4267 4268 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4269 #undef __FUNCT__ 4270 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4271 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4272 { 4273 PetscErrorCode ierr; 4274 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4275 PC_IS* pcis = (PC_IS*) (pc->data); 4276 const PetscScalar zero = 0.0; 4277 4278 PetscFunctionBegin; 4279 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4280 if (!pcbddc->benign_apply_coarse_only) { 4281 if (applytranspose) { 4282 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4283 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4284 } else { 4285 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4286 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4287 } 4288 } else { 4289 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4290 } 4291 4292 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4293 if (pcbddc->benign_n) { 4294 PetscScalar *array; 4295 PetscInt j; 4296 4297 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4298 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4299 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4300 } 4301 4302 /* start communications from local primal nodes to rhs of coarse solver */ 4303 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4304 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4305 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4306 4307 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4308 if (pcbddc->coarse_ksp) { 4309 Mat coarse_mat; 4310 Vec rhs,sol; 4311 MatNullSpace nullsp; 4312 PetscBool isbddc = PETSC_FALSE; 4313 4314 if (pcbddc->benign_have_null) { 4315 PC coarse_pc; 4316 4317 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4318 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4319 /* we need to propagate to coarser levels the need for a possible benign correction */ 4320 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4321 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4322 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4323 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4324 } 4325 } 4326 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4327 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4328 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4329 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4330 if (nullsp) { 4331 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4332 } 4333 if (applytranspose) { 4334 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4335 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4336 } else { 4337 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4338 PC coarse_pc; 4339 4340 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4341 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4342 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4343 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4344 } else { 4345 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4346 } 4347 } 4348 /* we don't need the benign correction at coarser levels anymore */ 4349 if (pcbddc->benign_have_null && isbddc) { 4350 PC coarse_pc; 4351 PC_BDDC* coarsepcbddc; 4352 4353 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4354 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4355 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4356 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4357 } 4358 if (nullsp) { 4359 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4360 } 4361 } 4362 4363 /* Local solution on R nodes */ 4364 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4365 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4366 } 4367 /* communications from coarse sol to local primal nodes */ 4368 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4369 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4370 4371 /* Sum contributions from the two levels */ 4372 if (!pcbddc->benign_apply_coarse_only) { 4373 if (applytranspose) { 4374 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4375 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4376 } else { 4377 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4378 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4379 } 4380 /* store p0 */ 4381 if (pcbddc->benign_n) { 4382 PetscScalar *array; 4383 PetscInt j; 4384 4385 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4386 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4387 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4388 } 4389 } else { /* expand the coarse solution */ 4390 if (applytranspose) { 4391 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4392 } else { 4393 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4394 } 4395 } 4396 PetscFunctionReturn(0); 4397 } 4398 4399 #undef __FUNCT__ 4400 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4401 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4402 { 4403 PetscErrorCode ierr; 4404 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4405 PetscScalar *array; 4406 Vec from,to; 4407 4408 PetscFunctionBegin; 4409 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4410 from = pcbddc->coarse_vec; 4411 to = pcbddc->vec1_P; 4412 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4413 Vec tvec; 4414 4415 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4416 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4417 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4418 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4419 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4420 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4421 } 4422 } else { /* from local to global -> put data in coarse right hand side */ 4423 from = pcbddc->vec1_P; 4424 to = pcbddc->coarse_vec; 4425 } 4426 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4427 PetscFunctionReturn(0); 4428 } 4429 4430 #undef __FUNCT__ 4431 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4432 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4433 { 4434 PetscErrorCode ierr; 4435 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4436 PetscScalar *array; 4437 Vec from,to; 4438 4439 PetscFunctionBegin; 4440 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4441 from = pcbddc->coarse_vec; 4442 to = pcbddc->vec1_P; 4443 } else { /* from local to global -> put data in coarse right hand side */ 4444 from = pcbddc->vec1_P; 4445 to = pcbddc->coarse_vec; 4446 } 4447 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4448 if (smode == SCATTER_FORWARD) { 4449 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4450 Vec tvec; 4451 4452 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4453 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4454 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4455 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4456 } 4457 } else { 4458 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4459 ierr = VecResetArray(from);CHKERRQ(ierr); 4460 } 4461 } 4462 PetscFunctionReturn(0); 4463 } 4464 4465 /* uncomment for testing purposes */ 4466 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4467 #undef __FUNCT__ 4468 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4469 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4470 { 4471 PetscErrorCode ierr; 4472 PC_IS* pcis = (PC_IS*)(pc->data); 4473 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4474 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4475 /* one and zero */ 4476 PetscScalar one=1.0,zero=0.0; 4477 /* space to store constraints and their local indices */ 4478 PetscScalar *constraints_data; 4479 PetscInt *constraints_idxs,*constraints_idxs_B; 4480 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4481 PetscInt *constraints_n; 4482 /* iterators */ 4483 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4484 /* BLAS integers */ 4485 PetscBLASInt lwork,lierr; 4486 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4487 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4488 /* reuse */ 4489 PetscInt olocal_primal_size,olocal_primal_size_cc; 4490 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4491 /* change of basis */ 4492 PetscBool qr_needed; 4493 PetscBT change_basis,qr_needed_idx; 4494 /* auxiliary stuff */ 4495 PetscInt *nnz,*is_indices; 4496 PetscInt ncc; 4497 /* some quantities */ 4498 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4499 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4500 4501 PetscFunctionBegin; 4502 /* Destroy Mat objects computed previously */ 4503 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4504 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4505 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4506 /* save info on constraints from previous setup (if any) */ 4507 olocal_primal_size = pcbddc->local_primal_size; 4508 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4509 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4510 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4511 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4512 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4513 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4514 4515 if (!pcbddc->adaptive_selection) { 4516 IS ISForVertices,*ISForFaces,*ISForEdges; 4517 MatNullSpace nearnullsp; 4518 const Vec *nearnullvecs; 4519 Vec *localnearnullsp; 4520 PetscScalar *array; 4521 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4522 PetscBool nnsp_has_cnst; 4523 /* LAPACK working arrays for SVD or POD */ 4524 PetscBool skip_lapack,boolforchange; 4525 PetscScalar *work; 4526 PetscReal *singular_vals; 4527 #if defined(PETSC_USE_COMPLEX) 4528 PetscReal *rwork; 4529 #endif 4530 #if defined(PETSC_MISSING_LAPACK_GESVD) 4531 PetscScalar *temp_basis,*correlation_mat; 4532 #else 4533 PetscBLASInt dummy_int=1; 4534 PetscScalar dummy_scalar=1.; 4535 #endif 4536 4537 /* Get index sets for faces, edges and vertices from graph */ 4538 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4539 /* print some info */ 4540 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 4541 PetscInt nv; 4542 4543 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4544 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4545 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4546 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4547 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4548 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4549 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4550 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4551 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4552 } 4553 4554 /* free unneeded index sets */ 4555 if (!pcbddc->use_vertices) { 4556 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4557 } 4558 if (!pcbddc->use_edges) { 4559 for (i=0;i<n_ISForEdges;i++) { 4560 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4561 } 4562 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4563 n_ISForEdges = 0; 4564 } 4565 if (!pcbddc->use_faces) { 4566 for (i=0;i<n_ISForFaces;i++) { 4567 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4568 } 4569 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4570 n_ISForFaces = 0; 4571 } 4572 4573 /* check if near null space is attached to global mat */ 4574 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4575 if (nearnullsp) { 4576 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4577 /* remove any stored info */ 4578 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4579 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4580 /* store information for BDDC solver reuse */ 4581 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4582 pcbddc->onearnullspace = nearnullsp; 4583 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4584 for (i=0;i<nnsp_size;i++) { 4585 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4586 } 4587 } else { /* if near null space is not provided BDDC uses constants by default */ 4588 nnsp_size = 0; 4589 nnsp_has_cnst = PETSC_TRUE; 4590 } 4591 /* get max number of constraints on a single cc */ 4592 max_constraints = nnsp_size; 4593 if (nnsp_has_cnst) max_constraints++; 4594 4595 /* 4596 Evaluate maximum storage size needed by the procedure 4597 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4598 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4599 There can be multiple constraints per connected component 4600 */ 4601 n_vertices = 0; 4602 if (ISForVertices) { 4603 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4604 } 4605 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4606 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4607 4608 total_counts = n_ISForFaces+n_ISForEdges; 4609 total_counts *= max_constraints; 4610 total_counts += n_vertices; 4611 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4612 4613 total_counts = 0; 4614 max_size_of_constraint = 0; 4615 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4616 IS used_is; 4617 if (i<n_ISForEdges) { 4618 used_is = ISForEdges[i]; 4619 } else { 4620 used_is = ISForFaces[i-n_ISForEdges]; 4621 } 4622 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4623 total_counts += j; 4624 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4625 } 4626 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4627 4628 /* get local part of global near null space vectors */ 4629 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4630 for (k=0;k<nnsp_size;k++) { 4631 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4632 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4633 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4634 } 4635 4636 /* whether or not to skip lapack calls */ 4637 skip_lapack = PETSC_TRUE; 4638 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4639 4640 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4641 if (!skip_lapack) { 4642 PetscScalar temp_work; 4643 4644 #if defined(PETSC_MISSING_LAPACK_GESVD) 4645 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4646 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4647 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4648 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4649 #if defined(PETSC_USE_COMPLEX) 4650 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4651 #endif 4652 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4653 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4654 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4655 lwork = -1; 4656 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4657 #if !defined(PETSC_USE_COMPLEX) 4658 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4659 #else 4660 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4661 #endif 4662 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4663 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4664 #else /* on missing GESVD */ 4665 /* SVD */ 4666 PetscInt max_n,min_n; 4667 max_n = max_size_of_constraint; 4668 min_n = max_constraints; 4669 if (max_size_of_constraint < max_constraints) { 4670 min_n = max_size_of_constraint; 4671 max_n = max_constraints; 4672 } 4673 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 4674 #if defined(PETSC_USE_COMPLEX) 4675 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 4676 #endif 4677 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4678 lwork = -1; 4679 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 4680 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 4681 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 4682 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4683 #if !defined(PETSC_USE_COMPLEX) 4684 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 4685 #else 4686 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 4687 #endif 4688 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4689 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 4690 #endif /* on missing GESVD */ 4691 /* Allocate optimal workspace */ 4692 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 4693 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 4694 } 4695 /* Now we can loop on constraining sets */ 4696 total_counts = 0; 4697 constraints_idxs_ptr[0] = 0; 4698 constraints_data_ptr[0] = 0; 4699 /* vertices */ 4700 if (n_vertices) { 4701 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4702 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4703 for (i=0;i<n_vertices;i++) { 4704 constraints_n[total_counts] = 1; 4705 constraints_data[total_counts] = 1.0; 4706 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 4707 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 4708 total_counts++; 4709 } 4710 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4711 n_vertices = total_counts; 4712 } 4713 4714 /* edges and faces */ 4715 total_counts_cc = total_counts; 4716 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 4717 IS used_is; 4718 PetscBool idxs_copied = PETSC_FALSE; 4719 4720 if (ncc<n_ISForEdges) { 4721 used_is = ISForEdges[ncc]; 4722 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 4723 } else { 4724 used_is = ISForFaces[ncc-n_ISForEdges]; 4725 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 4726 } 4727 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 4728 4729 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 4730 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4731 /* change of basis should not be performed on local periodic nodes */ 4732 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 4733 if (nnsp_has_cnst) { 4734 PetscScalar quad_value; 4735 4736 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4737 idxs_copied = PETSC_TRUE; 4738 4739 if (!pcbddc->use_nnsp_true) { 4740 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 4741 } else { 4742 quad_value = 1.0; 4743 } 4744 for (j=0;j<size_of_constraint;j++) { 4745 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 4746 } 4747 temp_constraints++; 4748 total_counts++; 4749 } 4750 for (k=0;k<nnsp_size;k++) { 4751 PetscReal real_value; 4752 PetscScalar *ptr_to_data; 4753 4754 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4755 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 4756 for (j=0;j<size_of_constraint;j++) { 4757 ptr_to_data[j] = array[is_indices[j]]; 4758 } 4759 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4760 /* check if array is null on the connected component */ 4761 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4762 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 4763 if (real_value > 0.0) { /* keep indices and values */ 4764 temp_constraints++; 4765 total_counts++; 4766 if (!idxs_copied) { 4767 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4768 idxs_copied = PETSC_TRUE; 4769 } 4770 } 4771 } 4772 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4773 valid_constraints = temp_constraints; 4774 if (!pcbddc->use_nnsp_true && temp_constraints) { 4775 if (temp_constraints == 1) { /* just normalize the constraint */ 4776 PetscScalar norm,*ptr_to_data; 4777 4778 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 4779 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4780 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 4781 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 4782 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 4783 } else { /* perform SVD */ 4784 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 4785 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 4786 4787 #if defined(PETSC_MISSING_LAPACK_GESVD) 4788 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 4789 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 4790 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 4791 the constraints basis will differ (by a complex factor with absolute value equal to 1) 4792 from that computed using LAPACKgesvd 4793 -> This is due to a different computation of eigenvectors in LAPACKheev 4794 -> The quality of the POD-computed basis will be the same */ 4795 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 4796 /* Store upper triangular part of correlation matrix */ 4797 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4798 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4799 for (j=0;j<temp_constraints;j++) { 4800 for (k=0;k<j+1;k++) { 4801 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 4802 } 4803 } 4804 /* compute eigenvalues and eigenvectors of correlation matrix */ 4805 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4806 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 4807 #if !defined(PETSC_USE_COMPLEX) 4808 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 4809 #else 4810 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 4811 #endif 4812 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4813 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 4814 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 4815 j = 0; 4816 while (j < temp_constraints && singular_vals[j] < tol) j++; 4817 total_counts = total_counts-j; 4818 valid_constraints = temp_constraints-j; 4819 /* scale and copy POD basis into used quadrature memory */ 4820 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4821 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4822 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 4823 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4824 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 4825 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4826 if (j<temp_constraints) { 4827 PetscInt ii; 4828 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 4829 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4830 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 4831 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4832 for (k=0;k<temp_constraints-j;k++) { 4833 for (ii=0;ii<size_of_constraint;ii++) { 4834 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 4835 } 4836 } 4837 } 4838 #else /* on missing GESVD */ 4839 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4840 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4841 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4842 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4843 #if !defined(PETSC_USE_COMPLEX) 4844 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 4845 #else 4846 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 4847 #endif 4848 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 4849 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4850 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 4851 k = temp_constraints; 4852 if (k > size_of_constraint) k = size_of_constraint; 4853 j = 0; 4854 while (j < k && singular_vals[k-j-1] < tol) j++; 4855 valid_constraints = k-j; 4856 total_counts = total_counts-temp_constraints+valid_constraints; 4857 #endif /* on missing GESVD */ 4858 } 4859 } 4860 /* update pointers information */ 4861 if (valid_constraints) { 4862 constraints_n[total_counts_cc] = valid_constraints; 4863 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4864 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4865 /* set change_of_basis flag */ 4866 if (boolforchange) { 4867 PetscBTSet(change_basis,total_counts_cc); 4868 } 4869 total_counts_cc++; 4870 } 4871 } 4872 /* free workspace */ 4873 if (!skip_lapack) { 4874 ierr = PetscFree(work);CHKERRQ(ierr); 4875 #if defined(PETSC_USE_COMPLEX) 4876 ierr = PetscFree(rwork);CHKERRQ(ierr); 4877 #endif 4878 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4879 #if defined(PETSC_MISSING_LAPACK_GESVD) 4880 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4881 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4882 #endif 4883 } 4884 for (k=0;k<nnsp_size;k++) { 4885 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4886 } 4887 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4888 /* free index sets of faces, edges and vertices */ 4889 for (i=0;i<n_ISForFaces;i++) { 4890 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4891 } 4892 if (n_ISForFaces) { 4893 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4894 } 4895 for (i=0;i<n_ISForEdges;i++) { 4896 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4897 } 4898 if (n_ISForEdges) { 4899 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4900 } 4901 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4902 } else { 4903 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4904 4905 total_counts = 0; 4906 n_vertices = 0; 4907 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4908 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4909 } 4910 max_constraints = 0; 4911 total_counts_cc = 0; 4912 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4913 total_counts += pcbddc->adaptive_constraints_n[i]; 4914 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4915 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4916 } 4917 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4918 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4919 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4920 constraints_data = pcbddc->adaptive_constraints_data; 4921 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4922 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4923 total_counts_cc = 0; 4924 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4925 if (pcbddc->adaptive_constraints_n[i]) { 4926 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4927 } 4928 } 4929 #if 0 4930 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4931 for (i=0;i<total_counts_cc;i++) { 4932 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4933 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4934 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4935 printf(" %d",constraints_idxs[j]); 4936 } 4937 printf("\n"); 4938 printf("number of cc: %d\n",constraints_n[i]); 4939 } 4940 for (i=0;i<n_vertices;i++) { 4941 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4942 } 4943 for (i=0;i<sub_schurs->n_subs;i++) { 4944 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4945 } 4946 #endif 4947 4948 max_size_of_constraint = 0; 4949 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4950 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4951 /* Change of basis */ 4952 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4953 if (pcbddc->use_change_of_basis) { 4954 for (i=0;i<sub_schurs->n_subs;i++) { 4955 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4956 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4957 } 4958 } 4959 } 4960 } 4961 pcbddc->local_primal_size = total_counts; 4962 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4963 4964 /* map constraints_idxs in boundary numbering */ 4965 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4966 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4967 4968 /* Create constraint matrix */ 4969 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4970 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4971 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4972 4973 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4974 /* determine if a QR strategy is needed for change of basis */ 4975 qr_needed = PETSC_FALSE; 4976 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4977 total_primal_vertices=0; 4978 pcbddc->local_primal_size_cc = 0; 4979 for (i=0;i<total_counts_cc;i++) { 4980 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4981 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4982 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4983 pcbddc->local_primal_size_cc += 1; 4984 } else if (PetscBTLookup(change_basis,i)) { 4985 for (k=0;k<constraints_n[i];k++) { 4986 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4987 } 4988 pcbddc->local_primal_size_cc += constraints_n[i]; 4989 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4990 PetscBTSet(qr_needed_idx,i); 4991 qr_needed = PETSC_TRUE; 4992 } 4993 } else { 4994 pcbddc->local_primal_size_cc += 1; 4995 } 4996 } 4997 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4998 pcbddc->n_vertices = total_primal_vertices; 4999 /* permute indices in order to have a sorted set of vertices */ 5000 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5001 5002 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5003 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5004 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5005 5006 /* nonzero structure of constraint matrix */ 5007 /* and get reference dof for local constraints */ 5008 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5009 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5010 5011 j = total_primal_vertices; 5012 total_counts = total_primal_vertices; 5013 cum = total_primal_vertices; 5014 for (i=n_vertices;i<total_counts_cc;i++) { 5015 if (!PetscBTLookup(change_basis,i)) { 5016 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5017 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5018 cum++; 5019 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5020 for (k=0;k<constraints_n[i];k++) { 5021 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5022 nnz[j+k] = size_of_constraint; 5023 } 5024 j += constraints_n[i]; 5025 } 5026 } 5027 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5028 ierr = PetscFree(nnz);CHKERRQ(ierr); 5029 5030 /* set values in constraint matrix */ 5031 for (i=0;i<total_primal_vertices;i++) { 5032 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5033 } 5034 total_counts = total_primal_vertices; 5035 for (i=n_vertices;i<total_counts_cc;i++) { 5036 if (!PetscBTLookup(change_basis,i)) { 5037 PetscInt *cols; 5038 5039 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5040 cols = constraints_idxs+constraints_idxs_ptr[i]; 5041 for (k=0;k<constraints_n[i];k++) { 5042 PetscInt row = total_counts+k; 5043 PetscScalar *vals; 5044 5045 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5046 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5047 } 5048 total_counts += constraints_n[i]; 5049 } 5050 } 5051 /* assembling */ 5052 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5053 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5054 5055 /* 5056 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5057 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5058 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5059 */ 5060 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5061 if (pcbddc->use_change_of_basis) { 5062 /* dual and primal dofs on a single cc */ 5063 PetscInt dual_dofs,primal_dofs; 5064 /* working stuff for GEQRF */ 5065 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5066 PetscBLASInt lqr_work; 5067 /* working stuff for UNGQR */ 5068 PetscScalar *gqr_work,lgqr_work_t; 5069 PetscBLASInt lgqr_work; 5070 /* working stuff for TRTRS */ 5071 PetscScalar *trs_rhs; 5072 PetscBLASInt Blas_NRHS; 5073 /* pointers for values insertion into change of basis matrix */ 5074 PetscInt *start_rows,*start_cols; 5075 PetscScalar *start_vals; 5076 /* working stuff for values insertion */ 5077 PetscBT is_primal; 5078 PetscInt *aux_primal_numbering_B; 5079 /* matrix sizes */ 5080 PetscInt global_size,local_size; 5081 /* temporary change of basis */ 5082 Mat localChangeOfBasisMatrix; 5083 /* extra space for debugging */ 5084 PetscScalar *dbg_work; 5085 5086 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5087 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5088 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5089 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5090 /* nonzeros for local mat */ 5091 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5092 if (!pcbddc->benign_change || pcbddc->fake_change) { 5093 for (i=0;i<pcis->n;i++) nnz[i]=1; 5094 } else { 5095 const PetscInt *ii; 5096 PetscInt n; 5097 PetscBool flg_row; 5098 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5099 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5100 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5101 } 5102 for (i=n_vertices;i<total_counts_cc;i++) { 5103 if (PetscBTLookup(change_basis,i)) { 5104 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5105 if (PetscBTLookup(qr_needed_idx,i)) { 5106 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5107 } else { 5108 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5109 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5110 } 5111 } 5112 } 5113 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5114 ierr = PetscFree(nnz);CHKERRQ(ierr); 5115 /* Set interior change in the matrix */ 5116 if (!pcbddc->benign_change || pcbddc->fake_change) { 5117 for (i=0;i<pcis->n;i++) { 5118 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5119 } 5120 } else { 5121 const PetscInt *ii,*jj; 5122 PetscScalar *aa; 5123 PetscInt n; 5124 PetscBool flg_row; 5125 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5126 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5127 for (i=0;i<n;i++) { 5128 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5129 } 5130 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5131 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5132 } 5133 5134 if (pcbddc->dbg_flag) { 5135 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5136 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5137 } 5138 5139 5140 /* Now we loop on the constraints which need a change of basis */ 5141 /* 5142 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5143 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5144 5145 Basic blocks of change of basis matrix T computed by 5146 5147 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5148 5149 | 1 0 ... 0 s_1/S | 5150 | 0 1 ... 0 s_2/S | 5151 | ... | 5152 | 0 ... 1 s_{n-1}/S | 5153 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5154 5155 with S = \sum_{i=1}^n s_i^2 5156 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5157 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5158 5159 - QR decomposition of constraints otherwise 5160 */ 5161 if (qr_needed) { 5162 /* space to store Q */ 5163 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5164 /* array to store scaling factors for reflectors */ 5165 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5166 /* first we issue queries for optimal work */ 5167 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5168 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5169 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5170 lqr_work = -1; 5171 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5172 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5173 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5174 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5175 lgqr_work = -1; 5176 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5177 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5178 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5179 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5180 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5181 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5182 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5183 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5184 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5185 /* array to store rhs and solution of triangular solver */ 5186 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5187 /* allocating workspace for check */ 5188 if (pcbddc->dbg_flag) { 5189 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5190 } 5191 } 5192 /* array to store whether a node is primal or not */ 5193 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5194 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5195 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5196 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5197 for (i=0;i<total_primal_vertices;i++) { 5198 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5199 } 5200 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5201 5202 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5203 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5204 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5205 if (PetscBTLookup(change_basis,total_counts)) { 5206 /* get constraint info */ 5207 primal_dofs = constraints_n[total_counts]; 5208 dual_dofs = size_of_constraint-primal_dofs; 5209 5210 if (pcbddc->dbg_flag) { 5211 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5212 } 5213 5214 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5215 5216 /* copy quadrature constraints for change of basis check */ 5217 if (pcbddc->dbg_flag) { 5218 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5219 } 5220 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5221 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5222 5223 /* compute QR decomposition of constraints */ 5224 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5225 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5226 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5227 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5228 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5229 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5230 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5231 5232 /* explictly compute R^-T */ 5233 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5234 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5235 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5236 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5237 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5238 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5239 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5240 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5241 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5242 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5243 5244 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5245 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5246 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5247 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5248 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5249 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5250 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5251 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5252 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5253 5254 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5255 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5256 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5257 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5258 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5259 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5260 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5261 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5262 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5263 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5264 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5265 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5266 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5267 5268 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5269 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5270 /* insert cols for primal dofs */ 5271 for (j=0;j<primal_dofs;j++) { 5272 start_vals = &qr_basis[j*size_of_constraint]; 5273 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5274 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5275 } 5276 /* insert cols for dual dofs */ 5277 for (j=0,k=0;j<dual_dofs;k++) { 5278 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5279 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5280 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5281 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5282 j++; 5283 } 5284 } 5285 5286 /* check change of basis */ 5287 if (pcbddc->dbg_flag) { 5288 PetscInt ii,jj; 5289 PetscBool valid_qr=PETSC_TRUE; 5290 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5291 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5292 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5293 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5294 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5295 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5296 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5297 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5298 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5299 for (jj=0;jj<size_of_constraint;jj++) { 5300 for (ii=0;ii<primal_dofs;ii++) { 5301 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5302 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5303 } 5304 } 5305 if (!valid_qr) { 5306 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5307 for (jj=0;jj<size_of_constraint;jj++) { 5308 for (ii=0;ii<primal_dofs;ii++) { 5309 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5310 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5311 } 5312 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5313 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5314 } 5315 } 5316 } 5317 } else { 5318 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5319 } 5320 } 5321 } else { /* simple transformation block */ 5322 PetscInt row,col; 5323 PetscScalar val,norm; 5324 5325 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5326 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5327 for (j=0;j<size_of_constraint;j++) { 5328 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5329 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5330 if (!PetscBTLookup(is_primal,row_B)) { 5331 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5332 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5333 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5334 } else { 5335 for (k=0;k<size_of_constraint;k++) { 5336 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5337 if (row != col) { 5338 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5339 } else { 5340 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5341 } 5342 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5343 } 5344 } 5345 } 5346 if (pcbddc->dbg_flag) { 5347 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5348 } 5349 } 5350 } else { 5351 if (pcbddc->dbg_flag) { 5352 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5353 } 5354 } 5355 } 5356 5357 /* free workspace */ 5358 if (qr_needed) { 5359 if (pcbddc->dbg_flag) { 5360 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5361 } 5362 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5363 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5364 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5365 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5366 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5367 } 5368 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5369 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5370 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5371 5372 /* assembling of global change of variable */ 5373 if (!pcbddc->fake_change) { 5374 Mat tmat; 5375 PetscInt bs; 5376 5377 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5378 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5379 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5380 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5381 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5382 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5383 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5384 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5385 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5386 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5387 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5388 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5389 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5390 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5391 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5392 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5393 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5394 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5395 5396 /* check */ 5397 if (pcbddc->dbg_flag) { 5398 PetscReal error; 5399 Vec x,x_change; 5400 5401 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5402 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5403 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5404 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5405 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5406 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5407 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5408 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5409 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5410 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5411 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5412 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5413 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5414 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5415 ierr = VecDestroy(&x);CHKERRQ(ierr); 5416 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5417 } 5418 /* adapt sub_schurs computed (if any) */ 5419 if (pcbddc->use_deluxe_scaling) { 5420 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5421 5422 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5423 if (sub_schurs && sub_schurs->S_Ej_all) { 5424 Mat S_new,tmat; 5425 IS is_all_N,is_V_Sall = NULL; 5426 5427 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5428 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5429 if (pcbddc->deluxe_zerorows) { 5430 ISLocalToGlobalMapping NtoSall; 5431 IS is_V; 5432 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5433 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5434 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5435 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5436 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5437 } 5438 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5439 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5440 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5441 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5442 if (pcbddc->deluxe_zerorows) { 5443 const PetscScalar *array; 5444 const PetscInt *idxs_V,*idxs_all; 5445 PetscInt i,n_V; 5446 5447 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5448 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5449 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5450 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5451 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5452 for (i=0;i<n_V;i++) { 5453 PetscScalar val; 5454 PetscInt idx; 5455 5456 idx = idxs_V[i]; 5457 val = array[idxs_all[idxs_V[i]]]; 5458 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5459 } 5460 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5461 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5462 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5463 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5464 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5465 } 5466 sub_schurs->S_Ej_all = S_new; 5467 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5468 if (sub_schurs->sum_S_Ej_all) { 5469 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5470 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5471 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5472 if (pcbddc->deluxe_zerorows) { 5473 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5474 } 5475 sub_schurs->sum_S_Ej_all = S_new; 5476 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5477 } 5478 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5479 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5480 } 5481 /* destroy any change of basis context in sub_schurs */ 5482 if (sub_schurs && sub_schurs->change) { 5483 PetscInt i; 5484 5485 for (i=0;i<sub_schurs->n_subs;i++) { 5486 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5487 } 5488 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5489 } 5490 } 5491 if (pcbddc->switch_static) { /* need to save the local change */ 5492 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5493 } else { 5494 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5495 } 5496 /* determine if any process has changed the pressures locally */ 5497 pcbddc->change_interior = pcbddc->benign_have_null; 5498 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5499 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5500 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5501 pcbddc->use_qr_single = qr_needed; 5502 } 5503 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5504 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5505 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5506 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5507 } else { 5508 Mat benign_global = NULL; 5509 if (pcbddc->benign_have_null) { 5510 Mat tmat; 5511 5512 pcbddc->change_interior = PETSC_TRUE; 5513 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5514 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5515 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5516 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5517 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5518 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5519 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5520 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5521 if (pcbddc->benign_change) { 5522 Mat M; 5523 5524 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5525 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5526 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5527 ierr = MatDestroy(&M);CHKERRQ(ierr); 5528 } else { 5529 Mat eye; 5530 PetscScalar *array; 5531 5532 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5533 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5534 for (i=0;i<pcis->n;i++) { 5535 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5536 } 5537 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5538 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5539 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5540 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5541 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5542 } 5543 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5544 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5545 } 5546 if (pcbddc->user_ChangeOfBasisMatrix) { 5547 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5548 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5549 } else if (pcbddc->benign_have_null) { 5550 pcbddc->ChangeOfBasisMatrix = benign_global; 5551 } 5552 } 5553 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5554 IS is_global; 5555 const PetscInt *gidxs; 5556 5557 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5558 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5559 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5560 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5561 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5562 } 5563 } 5564 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5565 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5566 } 5567 5568 if (!pcbddc->fake_change) { 5569 /* add pressure dofs to set of primal nodes for numbering purposes */ 5570 for (i=0;i<pcbddc->benign_n;i++) { 5571 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5572 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5573 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5574 pcbddc->local_primal_size_cc++; 5575 pcbddc->local_primal_size++; 5576 } 5577 5578 /* check if a new primal space has been introduced (also take into account benign trick) */ 5579 pcbddc->new_primal_space_local = PETSC_TRUE; 5580 if (olocal_primal_size == pcbddc->local_primal_size) { 5581 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5582 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5583 if (!pcbddc->new_primal_space_local) { 5584 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5585 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5586 } 5587 } 5588 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5589 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5590 } 5591 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5592 5593 /* flush dbg viewer */ 5594 if (pcbddc->dbg_flag) { 5595 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5596 } 5597 5598 /* free workspace */ 5599 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5600 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5601 if (!pcbddc->adaptive_selection) { 5602 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5603 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5604 } else { 5605 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5606 pcbddc->adaptive_constraints_idxs_ptr, 5607 pcbddc->adaptive_constraints_data_ptr, 5608 pcbddc->adaptive_constraints_idxs, 5609 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5610 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5611 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5612 } 5613 PetscFunctionReturn(0); 5614 } 5615 5616 #undef __FUNCT__ 5617 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5618 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5619 { 5620 ISLocalToGlobalMapping map; 5621 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5622 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5623 PetscInt ierr,i,N; 5624 5625 PetscFunctionBegin; 5626 if (pcbddc->graphanalyzed && !pcbddc->recompute_topography) PetscFunctionReturn(0); 5627 /* Reset previously computed graph */ 5628 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5629 /* Init local Graph struct */ 5630 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5631 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5632 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5633 5634 /* Check validity of the csr graph passed in by the user */ 5635 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5636 5637 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5638 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5639 PetscInt *xadj,*adjncy; 5640 PetscInt nvtxs; 5641 PetscBool flg_row=PETSC_FALSE; 5642 5643 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5644 if (flg_row) { 5645 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5646 pcbddc->computed_rowadj = PETSC_TRUE; 5647 } 5648 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5649 } 5650 if (pcbddc->dbg_flag) { 5651 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5652 } 5653 5654 /* Setup of Graph */ 5655 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5656 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5657 5658 /* attach info on disconnected subdomains if present */ 5659 if (pcbddc->n_local_subs) { 5660 PetscInt *local_subs; 5661 5662 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5663 for (i=0;i<pcbddc->n_local_subs;i++) { 5664 const PetscInt *idxs; 5665 PetscInt nl,j; 5666 5667 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5668 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5669 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 5670 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5671 } 5672 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 5673 pcbddc->mat_graph->local_subs = local_subs; 5674 } 5675 5676 /* Graph's connected components analysis */ 5677 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 5678 5679 /* set flag indicating analysis has been done */ 5680 pcbddc->graphanalyzed = PETSC_TRUE; 5681 PetscFunctionReturn(0); 5682 } 5683 5684 #undef __FUNCT__ 5685 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 5686 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 5687 { 5688 PetscInt i,j; 5689 PetscScalar *alphas; 5690 PetscErrorCode ierr; 5691 5692 PetscFunctionBegin; 5693 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 5694 for (i=0;i<n;i++) { 5695 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 5696 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 5697 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 5698 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5699 } 5700 ierr = PetscFree(alphas);CHKERRQ(ierr); 5701 PetscFunctionReturn(0); 5702 } 5703 5704 #undef __FUNCT__ 5705 #define __FUNCT__ "MatISGetSubassemblingPattern" 5706 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5707 { 5708 Mat A; 5709 PetscInt n_neighs,*neighs,*n_shared,**shared; 5710 PetscMPIInt size,rank,color; 5711 PetscInt *xadj,*adjncy; 5712 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5713 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5714 PetscInt void_procs,*procs_candidates = NULL; 5715 PetscInt xadj_count, *count; 5716 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5717 PetscSubcomm psubcomm; 5718 MPI_Comm subcomm; 5719 PetscErrorCode ierr; 5720 5721 PetscFunctionBegin; 5722 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5723 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5724 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5725 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5726 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5727 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5728 5729 if (have_void) *have_void = PETSC_FALSE; 5730 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5731 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5732 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5733 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5734 im_active = !!(n); 5735 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5736 void_procs = size - active_procs; 5737 /* get ranks of of non-active processes in mat communicator */ 5738 if (void_procs) { 5739 PetscInt ncand; 5740 5741 if (have_void) *have_void = PETSC_TRUE; 5742 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5743 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5744 for (i=0,ncand=0;i<size;i++) { 5745 if (!procs_candidates[i]) { 5746 procs_candidates[ncand++] = i; 5747 } 5748 } 5749 /* force n_subdomains to be not greater that the number of non-active processes */ 5750 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5751 } 5752 5753 /* number of subdomains requested greater than active processes -> just shift the matrix 5754 number of subdomains requested 1 -> send to master or first candidate in voids */ 5755 if (active_procs < *n_subdomains || *n_subdomains == 1) { 5756 PetscInt issize,isidx,dest; 5757 if (*n_subdomains == 1) dest = 0; 5758 else dest = rank; 5759 if (im_active) { 5760 issize = 1; 5761 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5762 isidx = procs_candidates[dest]; 5763 } else { 5764 isidx = dest; 5765 } 5766 } else { 5767 issize = 0; 5768 isidx = -1; 5769 } 5770 *n_subdomains = active_procs; 5771 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 5772 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5773 PetscFunctionReturn(0); 5774 } 5775 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 5776 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 5777 threshold = PetscMax(threshold,2); 5778 5779 /* Get info on mapping */ 5780 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 5781 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5782 5783 /* build local CSR graph of subdomains' connectivity */ 5784 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 5785 xadj[0] = 0; 5786 xadj[1] = PetscMax(n_neighs-1,0); 5787 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 5788 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 5789 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 5790 for (i=1;i<n_neighs;i++) 5791 for (j=0;j<n_shared[i];j++) 5792 count[shared[i][j]] += 1; 5793 5794 xadj_count = 0; 5795 for (i=1;i<n_neighs;i++) { 5796 for (j=0;j<n_shared[i];j++) { 5797 if (count[shared[i][j]] < threshold) { 5798 adjncy[xadj_count] = neighs[i]; 5799 adjncy_wgt[xadj_count] = n_shared[i]; 5800 xadj_count++; 5801 break; 5802 } 5803 } 5804 } 5805 xadj[1] = xadj_count; 5806 ierr = PetscFree(count);CHKERRQ(ierr); 5807 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5808 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5809 5810 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 5811 5812 /* Restrict work on active processes only */ 5813 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 5814 if (void_procs) { 5815 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 5816 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 5817 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 5818 subcomm = PetscSubcommChild(psubcomm); 5819 } else { 5820 psubcomm = NULL; 5821 subcomm = PetscObjectComm((PetscObject)mat); 5822 } 5823 5824 v_wgt = NULL; 5825 if (!color) { 5826 ierr = PetscFree(xadj);CHKERRQ(ierr); 5827 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5828 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5829 } else { 5830 Mat subdomain_adj; 5831 IS new_ranks,new_ranks_contig; 5832 MatPartitioning partitioner; 5833 PetscInt rstart=0,rend=0; 5834 PetscInt *is_indices,*oldranks; 5835 PetscMPIInt size; 5836 PetscBool aggregate; 5837 5838 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5839 if (void_procs) { 5840 PetscInt prank = rank; 5841 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5842 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5843 for (i=0;i<xadj[1];i++) { 5844 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5845 } 5846 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5847 } else { 5848 oldranks = NULL; 5849 } 5850 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5851 if (aggregate) { /* TODO: all this part could be made more efficient */ 5852 PetscInt lrows,row,ncols,*cols; 5853 PetscMPIInt nrank; 5854 PetscScalar *vals; 5855 5856 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5857 lrows = 0; 5858 if (nrank<redprocs) { 5859 lrows = size/redprocs; 5860 if (nrank<size%redprocs) lrows++; 5861 } 5862 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5863 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5864 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5865 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5866 row = nrank; 5867 ncols = xadj[1]-xadj[0]; 5868 cols = adjncy; 5869 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5870 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5871 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5872 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5873 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5874 ierr = PetscFree(xadj);CHKERRQ(ierr); 5875 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5876 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5877 ierr = PetscFree(vals);CHKERRQ(ierr); 5878 if (use_vwgt) { 5879 Vec v; 5880 const PetscScalar *array; 5881 PetscInt nl; 5882 5883 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5884 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5885 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5886 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5887 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5888 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5889 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5890 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5891 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5892 ierr = VecDestroy(&v);CHKERRQ(ierr); 5893 } 5894 } else { 5895 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5896 if (use_vwgt) { 5897 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5898 v_wgt[0] = local_size; 5899 } 5900 } 5901 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5902 5903 /* Partition */ 5904 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5905 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5906 if (v_wgt) { 5907 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5908 } 5909 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5910 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5911 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5912 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5913 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5914 5915 /* renumber new_ranks to avoid "holes" in new set of processors */ 5916 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5917 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5918 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5919 if (!aggregate) { 5920 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5921 #if defined(PETSC_USE_DEBUG) 5922 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5923 #endif 5924 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5925 } else if (oldranks) { 5926 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5927 } else { 5928 ranks_send_to_idx[0] = is_indices[0]; 5929 } 5930 } else { 5931 PetscInt idxs[1]; 5932 PetscMPIInt tag; 5933 MPI_Request *reqs; 5934 5935 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5936 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5937 for (i=rstart;i<rend;i++) { 5938 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5939 } 5940 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5941 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5942 ierr = PetscFree(reqs);CHKERRQ(ierr); 5943 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5944 #if defined(PETSC_USE_DEBUG) 5945 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5946 #endif 5947 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5948 } else if (oldranks) { 5949 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5950 } else { 5951 ranks_send_to_idx[0] = idxs[0]; 5952 } 5953 } 5954 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5955 /* clean up */ 5956 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5957 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5958 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5959 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5960 } 5961 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5962 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5963 5964 /* assemble parallel IS for sends */ 5965 i = 1; 5966 if (!color) i=0; 5967 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5968 PetscFunctionReturn(0); 5969 } 5970 5971 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5972 5973 #undef __FUNCT__ 5974 #define __FUNCT__ "MatISSubassemble" 5975 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5976 { 5977 Mat local_mat; 5978 IS is_sends_internal; 5979 PetscInt rows,cols,new_local_rows; 5980 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5981 PetscBool ismatis,isdense,newisdense,destroy_mat; 5982 ISLocalToGlobalMapping l2gmap; 5983 PetscInt* l2gmap_indices; 5984 const PetscInt* is_indices; 5985 MatType new_local_type; 5986 /* buffers */ 5987 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5988 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5989 PetscInt *recv_buffer_idxs_local; 5990 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5991 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5992 /* MPI */ 5993 MPI_Comm comm,comm_n; 5994 PetscSubcomm subcomm; 5995 PetscMPIInt n_sends,n_recvs,commsize; 5996 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5997 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5998 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5999 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6000 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6001 PetscErrorCode ierr; 6002 6003 PetscFunctionBegin; 6004 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6005 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6006 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6007 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6008 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6009 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6010 PetscValidLogicalCollectiveBool(mat,reuse,6); 6011 PetscValidLogicalCollectiveInt(mat,nis,8); 6012 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6013 if (nvecs) { 6014 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6015 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6016 } 6017 /* further checks */ 6018 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6019 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6020 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6021 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6022 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6023 if (reuse && *mat_n) { 6024 PetscInt mrows,mcols,mnrows,mncols; 6025 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6026 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6027 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6028 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6029 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6030 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6031 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6032 } 6033 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6034 PetscValidLogicalCollectiveInt(mat,bs,0); 6035 6036 /* prepare IS for sending if not provided */ 6037 if (!is_sends) { 6038 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6039 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6040 } else { 6041 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6042 is_sends_internal = is_sends; 6043 } 6044 6045 /* get comm */ 6046 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6047 6048 /* compute number of sends */ 6049 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6050 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6051 6052 /* compute number of receives */ 6053 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6054 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6055 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6056 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6057 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6058 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6059 ierr = PetscFree(iflags);CHKERRQ(ierr); 6060 6061 /* restrict comm if requested */ 6062 subcomm = 0; 6063 destroy_mat = PETSC_FALSE; 6064 if (restrict_comm) { 6065 PetscMPIInt color,subcommsize; 6066 6067 color = 0; 6068 if (restrict_full) { 6069 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6070 } else { 6071 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6072 } 6073 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6074 subcommsize = commsize - subcommsize; 6075 /* check if reuse has been requested */ 6076 if (reuse) { 6077 if (*mat_n) { 6078 PetscMPIInt subcommsize2; 6079 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6080 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6081 comm_n = PetscObjectComm((PetscObject)*mat_n); 6082 } else { 6083 comm_n = PETSC_COMM_SELF; 6084 } 6085 } else { /* MAT_INITIAL_MATRIX */ 6086 PetscMPIInt rank; 6087 6088 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6089 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6090 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6091 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6092 comm_n = PetscSubcommChild(subcomm); 6093 } 6094 /* flag to destroy *mat_n if not significative */ 6095 if (color) destroy_mat = PETSC_TRUE; 6096 } else { 6097 comm_n = comm; 6098 } 6099 6100 /* prepare send/receive buffers */ 6101 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6102 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6103 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6104 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6105 if (nis) { 6106 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6107 } 6108 6109 /* Get data from local matrices */ 6110 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6111 /* TODO: See below some guidelines on how to prepare the local buffers */ 6112 /* 6113 send_buffer_vals should contain the raw values of the local matrix 6114 send_buffer_idxs should contain: 6115 - MatType_PRIVATE type 6116 - PetscInt size_of_l2gmap 6117 - PetscInt global_row_indices[size_of_l2gmap] 6118 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6119 */ 6120 else { 6121 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6122 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6123 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6124 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6125 send_buffer_idxs[1] = i; 6126 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6127 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6128 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6129 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6130 for (i=0;i<n_sends;i++) { 6131 ilengths_vals[is_indices[i]] = len*len; 6132 ilengths_idxs[is_indices[i]] = len+2; 6133 } 6134 } 6135 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6136 /* additional is (if any) */ 6137 if (nis) { 6138 PetscMPIInt psum; 6139 PetscInt j; 6140 for (j=0,psum=0;j<nis;j++) { 6141 PetscInt plen; 6142 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6143 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6144 psum += len+1; /* indices + lenght */ 6145 } 6146 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6147 for (j=0,psum=0;j<nis;j++) { 6148 PetscInt plen; 6149 const PetscInt *is_array_idxs; 6150 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6151 send_buffer_idxs_is[psum] = plen; 6152 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6153 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6154 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6155 psum += plen+1; /* indices + lenght */ 6156 } 6157 for (i=0;i<n_sends;i++) { 6158 ilengths_idxs_is[is_indices[i]] = psum; 6159 } 6160 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6161 } 6162 6163 buf_size_idxs = 0; 6164 buf_size_vals = 0; 6165 buf_size_idxs_is = 0; 6166 buf_size_vecs = 0; 6167 for (i=0;i<n_recvs;i++) { 6168 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6169 buf_size_vals += (PetscInt)olengths_vals[i]; 6170 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6171 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6172 } 6173 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6174 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6175 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6176 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6177 6178 /* get new tags for clean communications */ 6179 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6180 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6181 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6182 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6183 6184 /* allocate for requests */ 6185 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6186 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6187 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6188 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6189 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6190 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6191 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6192 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6193 6194 /* communications */ 6195 ptr_idxs = recv_buffer_idxs; 6196 ptr_vals = recv_buffer_vals; 6197 ptr_idxs_is = recv_buffer_idxs_is; 6198 ptr_vecs = recv_buffer_vecs; 6199 for (i=0;i<n_recvs;i++) { 6200 source_dest = onodes[i]; 6201 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6202 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6203 ptr_idxs += olengths_idxs[i]; 6204 ptr_vals += olengths_vals[i]; 6205 if (nis) { 6206 source_dest = onodes_is[i]; 6207 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6208 ptr_idxs_is += olengths_idxs_is[i]; 6209 } 6210 if (nvecs) { 6211 source_dest = onodes[i]; 6212 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6213 ptr_vecs += olengths_idxs[i]-2; 6214 } 6215 } 6216 for (i=0;i<n_sends;i++) { 6217 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6218 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6219 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6220 if (nis) { 6221 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6222 } 6223 if (nvecs) { 6224 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6225 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6226 } 6227 } 6228 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6229 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6230 6231 /* assemble new l2g map */ 6232 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6233 ptr_idxs = recv_buffer_idxs; 6234 new_local_rows = 0; 6235 for (i=0;i<n_recvs;i++) { 6236 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6237 ptr_idxs += olengths_idxs[i]; 6238 } 6239 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6240 ptr_idxs = recv_buffer_idxs; 6241 new_local_rows = 0; 6242 for (i=0;i<n_recvs;i++) { 6243 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6244 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6245 ptr_idxs += olengths_idxs[i]; 6246 } 6247 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6248 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6249 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6250 6251 /* infer new local matrix type from received local matrices type */ 6252 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6253 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6254 if (n_recvs) { 6255 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6256 ptr_idxs = recv_buffer_idxs; 6257 for (i=0;i<n_recvs;i++) { 6258 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6259 new_local_type_private = MATAIJ_PRIVATE; 6260 break; 6261 } 6262 ptr_idxs += olengths_idxs[i]; 6263 } 6264 switch (new_local_type_private) { 6265 case MATDENSE_PRIVATE: 6266 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6267 new_local_type = MATSEQAIJ; 6268 bs = 1; 6269 } else { /* if I receive only 1 dense matrix */ 6270 new_local_type = MATSEQDENSE; 6271 bs = 1; 6272 } 6273 break; 6274 case MATAIJ_PRIVATE: 6275 new_local_type = MATSEQAIJ; 6276 bs = 1; 6277 break; 6278 case MATBAIJ_PRIVATE: 6279 new_local_type = MATSEQBAIJ; 6280 break; 6281 case MATSBAIJ_PRIVATE: 6282 new_local_type = MATSEQSBAIJ; 6283 break; 6284 default: 6285 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6286 break; 6287 } 6288 } else { /* by default, new_local_type is seqdense */ 6289 new_local_type = MATSEQDENSE; 6290 bs = 1; 6291 } 6292 6293 /* create MATIS object if needed */ 6294 if (!reuse) { 6295 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6296 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6297 } else { 6298 /* it also destroys the local matrices */ 6299 if (*mat_n) { 6300 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6301 } else { /* this is a fake object */ 6302 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6303 } 6304 } 6305 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6306 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6307 6308 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6309 6310 /* Global to local map of received indices */ 6311 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6312 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6313 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6314 6315 /* restore attributes -> type of incoming data and its size */ 6316 buf_size_idxs = 0; 6317 for (i=0;i<n_recvs;i++) { 6318 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6319 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6320 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6321 } 6322 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6323 6324 /* set preallocation */ 6325 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6326 if (!newisdense) { 6327 PetscInt *new_local_nnz=0; 6328 6329 ptr_idxs = recv_buffer_idxs_local; 6330 if (n_recvs) { 6331 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6332 } 6333 for (i=0;i<n_recvs;i++) { 6334 PetscInt j; 6335 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6336 for (j=0;j<*(ptr_idxs+1);j++) { 6337 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6338 } 6339 } else { 6340 /* TODO */ 6341 } 6342 ptr_idxs += olengths_idxs[i]; 6343 } 6344 if (new_local_nnz) { 6345 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6346 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6347 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6348 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6349 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6350 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6351 } else { 6352 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6353 } 6354 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6355 } else { 6356 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6357 } 6358 6359 /* set values */ 6360 ptr_vals = recv_buffer_vals; 6361 ptr_idxs = recv_buffer_idxs_local; 6362 for (i=0;i<n_recvs;i++) { 6363 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6364 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6365 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6366 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6367 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6368 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6369 } else { 6370 /* TODO */ 6371 } 6372 ptr_idxs += olengths_idxs[i]; 6373 ptr_vals += olengths_vals[i]; 6374 } 6375 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6376 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6377 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6378 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6379 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6380 6381 #if 0 6382 if (!restrict_comm) { /* check */ 6383 Vec lvec,rvec; 6384 PetscReal infty_error; 6385 6386 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6387 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6388 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6389 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6390 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6391 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6392 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6393 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6394 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6395 } 6396 #endif 6397 6398 /* assemble new additional is (if any) */ 6399 if (nis) { 6400 PetscInt **temp_idxs,*count_is,j,psum; 6401 6402 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6403 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6404 ptr_idxs = recv_buffer_idxs_is; 6405 psum = 0; 6406 for (i=0;i<n_recvs;i++) { 6407 for (j=0;j<nis;j++) { 6408 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6409 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6410 psum += plen; 6411 ptr_idxs += plen+1; /* shift pointer to received data */ 6412 } 6413 } 6414 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6415 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6416 for (i=1;i<nis;i++) { 6417 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6418 } 6419 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6420 ptr_idxs = recv_buffer_idxs_is; 6421 for (i=0;i<n_recvs;i++) { 6422 for (j=0;j<nis;j++) { 6423 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6424 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6425 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6426 ptr_idxs += plen+1; /* shift pointer to received data */ 6427 } 6428 } 6429 for (i=0;i<nis;i++) { 6430 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6431 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6432 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6433 } 6434 ierr = PetscFree(count_is);CHKERRQ(ierr); 6435 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6436 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6437 } 6438 /* free workspace */ 6439 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6440 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6441 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6442 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6443 if (isdense) { 6444 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6445 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6446 } else { 6447 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6448 } 6449 if (nis) { 6450 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6451 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6452 } 6453 6454 if (nvecs) { 6455 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6456 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6457 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6458 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6459 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6460 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6461 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6462 /* set values */ 6463 ptr_vals = recv_buffer_vecs; 6464 ptr_idxs = recv_buffer_idxs_local; 6465 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6466 for (i=0;i<n_recvs;i++) { 6467 PetscInt j; 6468 for (j=0;j<*(ptr_idxs+1);j++) { 6469 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6470 } 6471 ptr_idxs += olengths_idxs[i]; 6472 ptr_vals += olengths_idxs[i]-2; 6473 } 6474 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6475 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6476 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6477 } 6478 6479 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6480 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6481 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6482 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6483 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6484 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6485 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6486 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6487 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6488 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6489 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6490 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6491 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6492 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6493 ierr = PetscFree(onodes);CHKERRQ(ierr); 6494 if (nis) { 6495 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6496 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6497 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6498 } 6499 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6500 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6501 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6502 for (i=0;i<nis;i++) { 6503 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6504 } 6505 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6506 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6507 } 6508 *mat_n = NULL; 6509 } 6510 PetscFunctionReturn(0); 6511 } 6512 6513 /* temporary hack into ksp private data structure */ 6514 #include <petsc/private/kspimpl.h> 6515 6516 #undef __FUNCT__ 6517 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6518 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6519 { 6520 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6521 PC_IS *pcis = (PC_IS*)pc->data; 6522 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6523 Mat coarsedivudotp = NULL; 6524 MatNullSpace CoarseNullSpace = NULL; 6525 ISLocalToGlobalMapping coarse_islg; 6526 IS coarse_is,*isarray; 6527 PetscInt i,im_active=-1,active_procs=-1; 6528 PetscInt nis,nisdofs,nisneu,nisvert; 6529 PC pc_temp; 6530 PCType coarse_pc_type; 6531 KSPType coarse_ksp_type; 6532 PetscBool multilevel_requested,multilevel_allowed; 6533 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6534 Mat t_coarse_mat_is; 6535 PetscInt ncoarse; 6536 PetscBool compute_vecs = PETSC_FALSE; 6537 PetscScalar *array; 6538 MatReuse coarse_mat_reuse; 6539 PetscBool restr, full_restr, have_void; 6540 PetscErrorCode ierr; 6541 6542 PetscFunctionBegin; 6543 /* Assign global numbering to coarse dofs */ 6544 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6545 PetscInt ocoarse_size; 6546 compute_vecs = PETSC_TRUE; 6547 ocoarse_size = pcbddc->coarse_size; 6548 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6549 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6550 /* see if we can avoid some work */ 6551 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6552 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6553 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6554 PC pc; 6555 PetscBool isbddc; 6556 6557 /* temporary workaround since PCBDDC does not have a reset method so far */ 6558 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6559 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6560 if (isbddc) { 6561 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6562 } else { 6563 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6564 } 6565 coarse_reuse = PETSC_FALSE; 6566 } else { /* we can safely reuse already computed coarse matrix */ 6567 coarse_reuse = PETSC_TRUE; 6568 } 6569 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6570 coarse_reuse = PETSC_FALSE; 6571 } 6572 /* reset any subassembling information */ 6573 if (!coarse_reuse || pcbddc->recompute_topography) { 6574 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6575 } 6576 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6577 coarse_reuse = PETSC_TRUE; 6578 } 6579 /* assemble coarse matrix */ 6580 if (coarse_reuse && pcbddc->coarse_ksp) { 6581 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6582 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6583 coarse_mat_reuse = MAT_REUSE_MATRIX; 6584 } else { 6585 coarse_mat = NULL; 6586 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6587 } 6588 6589 /* creates temporary l2gmap and IS for coarse indexes */ 6590 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6591 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6592 6593 /* creates temporary MATIS object for coarse matrix */ 6594 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6595 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6596 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6597 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6598 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6599 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6600 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6601 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6602 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6603 6604 /* count "active" (i.e. with positive local size) and "void" processes */ 6605 im_active = !!(pcis->n); 6606 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6607 6608 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6609 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6610 /* full_restr : just use the receivers from the subassembling pattern */ 6611 coarse_mat_is = NULL; 6612 multilevel_allowed = PETSC_FALSE; 6613 multilevel_requested = PETSC_FALSE; 6614 full_restr = PETSC_TRUE; 6615 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6616 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6617 if (multilevel_requested) { 6618 ncoarse = active_procs/pcbddc->coarsening_ratio; 6619 restr = PETSC_FALSE; 6620 full_restr = PETSC_FALSE; 6621 } else { 6622 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6623 restr = PETSC_TRUE; 6624 full_restr = PETSC_TRUE; 6625 } 6626 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6627 ncoarse = PetscMax(1,ncoarse); 6628 if (!pcbddc->coarse_subassembling) { 6629 if (pcbddc->coarsening_ratio > 1) { 6630 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6631 } else { 6632 PetscMPIInt size,rank; 6633 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6634 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6635 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6636 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6637 } 6638 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6639 PetscInt psum; 6640 PetscMPIInt size; 6641 if (pcbddc->coarse_ksp) psum = 1; 6642 else psum = 0; 6643 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6644 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6645 if (ncoarse < size) have_void = PETSC_TRUE; 6646 } 6647 /* determine if we can go multilevel */ 6648 if (multilevel_requested) { 6649 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6650 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6651 } 6652 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6653 6654 /* dump subassembling pattern */ 6655 if (pcbddc->dbg_flag && multilevel_allowed) { 6656 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6657 } 6658 6659 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6660 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6661 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6662 const PetscInt *idxs; 6663 ISLocalToGlobalMapping tmap; 6664 6665 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6666 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6667 /* allocate space for temporary storage */ 6668 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6669 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 6670 /* allocate for IS array */ 6671 nisdofs = pcbddc->n_ISForDofsLocal; 6672 nisneu = !!pcbddc->NeumannBoundariesLocal; 6673 nisvert = 0; /* nisvert is not used */ 6674 nis = nisdofs + nisneu + nisvert; 6675 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 6676 /* dofs splitting */ 6677 for (i=0;i<nisdofs;i++) { 6678 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 6679 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 6680 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6681 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6682 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6683 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6684 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6685 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 6686 } 6687 /* neumann boundaries */ 6688 if (pcbddc->NeumannBoundariesLocal) { 6689 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 6690 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 6691 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6692 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6693 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6694 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6695 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 6696 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 6697 } 6698 /* free memory */ 6699 ierr = PetscFree(tidxs);CHKERRQ(ierr); 6700 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 6701 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6702 } else { 6703 nis = 0; 6704 nisdofs = 0; 6705 nisneu = 0; 6706 nisvert = 0; 6707 isarray = NULL; 6708 } 6709 /* destroy no longer needed map */ 6710 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6711 6712 /* subassemble */ 6713 if (multilevel_allowed) { 6714 Vec vp[1]; 6715 PetscInt nvecs = 0; 6716 PetscBool reuse,reuser; 6717 6718 if (coarse_mat) reuse = PETSC_TRUE; 6719 else reuse = PETSC_FALSE; 6720 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6721 vp[0] = NULL; 6722 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6723 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6724 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6725 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6726 nvecs = 1; 6727 6728 if (pcbddc->divudotp) { 6729 Mat B,loc_divudotp; 6730 Vec v,p; 6731 IS dummy; 6732 PetscInt np; 6733 6734 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6735 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6736 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6737 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6738 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6739 ierr = VecSet(p,1.);CHKERRQ(ierr); 6740 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6741 ierr = VecDestroy(&p);CHKERRQ(ierr); 6742 ierr = MatDestroy(&B);CHKERRQ(ierr); 6743 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6744 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6745 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6746 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6747 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6748 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6749 ierr = VecDestroy(&v);CHKERRQ(ierr); 6750 } 6751 } 6752 if (reuser) { 6753 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6754 } else { 6755 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6756 } 6757 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6758 PetscScalar *arraym,*arrayv; 6759 PetscInt nl; 6760 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 6761 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 6762 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6763 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 6764 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 6765 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 6766 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6767 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 6768 } else { 6769 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 6770 } 6771 } else { 6772 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 6773 } 6774 if (coarse_mat_is || coarse_mat) { 6775 PetscMPIInt size; 6776 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 6777 if (!multilevel_allowed) { 6778 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 6779 } else { 6780 Mat A; 6781 6782 /* if this matrix is present, it means we are not reusing the coarse matrix */ 6783 if (coarse_mat_is) { 6784 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 6785 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 6786 coarse_mat = coarse_mat_is; 6787 } 6788 /* be sure we don't have MatSeqDENSE as local mat */ 6789 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 6790 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 6791 } 6792 } 6793 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 6794 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 6795 6796 /* create local to global scatters for coarse problem */ 6797 if (compute_vecs) { 6798 PetscInt lrows; 6799 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 6800 if (coarse_mat) { 6801 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 6802 } else { 6803 lrows = 0; 6804 } 6805 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 6806 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 6807 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 6808 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6809 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6810 } 6811 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 6812 6813 /* set defaults for coarse KSP and PC */ 6814 if (multilevel_allowed) { 6815 coarse_ksp_type = KSPRICHARDSON; 6816 coarse_pc_type = PCBDDC; 6817 } else { 6818 coarse_ksp_type = KSPPREONLY; 6819 coarse_pc_type = PCREDUNDANT; 6820 } 6821 6822 /* print some info if requested */ 6823 if (pcbddc->dbg_flag) { 6824 if (!multilevel_allowed) { 6825 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6826 if (multilevel_requested) { 6827 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6828 } else if (pcbddc->max_levels) { 6829 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 6830 } 6831 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6832 } 6833 } 6834 6835 /* create the coarse KSP object only once with defaults */ 6836 if (coarse_mat) { 6837 PetscViewer dbg_viewer = NULL; 6838 if (pcbddc->dbg_flag) { 6839 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 6840 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6841 } 6842 if (!pcbddc->coarse_ksp) { 6843 char prefix[256],str_level[16]; 6844 size_t len; 6845 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6846 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6847 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6848 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6849 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6850 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6851 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6852 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6853 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6854 /* prefix */ 6855 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6856 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6857 if (!pcbddc->current_level) { 6858 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6859 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6860 } else { 6861 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6862 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6863 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6864 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6865 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6866 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6867 } 6868 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6869 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6870 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6871 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6872 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6873 /* allow user customization */ 6874 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6875 } 6876 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6877 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6878 if (nisdofs) { 6879 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6880 for (i=0;i<nisdofs;i++) { 6881 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6882 } 6883 } 6884 if (nisneu) { 6885 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6886 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6887 } 6888 if (nisvert) { 6889 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6890 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6891 } 6892 6893 /* get some info after set from options */ 6894 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6895 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6896 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6897 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6898 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6899 isbddc = PETSC_FALSE; 6900 } 6901 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6902 if (isredundant) { 6903 KSP inner_ksp; 6904 PC inner_pc; 6905 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6906 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6907 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6908 } 6909 6910 /* parameters which miss an API */ 6911 if (isbddc) { 6912 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6913 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6914 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6915 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6916 if (pcbddc_coarse->benign_saddle_point) { 6917 Mat coarsedivudotp_is; 6918 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6919 IS row,col; 6920 const PetscInt *gidxs; 6921 PetscInt n,st,M,N; 6922 6923 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6924 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6925 st = st-n; 6926 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6927 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6928 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6929 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6930 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6931 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6932 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6933 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6934 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6935 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6936 ierr = ISDestroy(&row);CHKERRQ(ierr); 6937 ierr = ISDestroy(&col);CHKERRQ(ierr); 6938 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6939 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6940 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6941 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6942 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6943 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6944 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6945 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6946 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 6947 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6948 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6949 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6950 } 6951 } 6952 6953 /* propagate symmetry info of coarse matrix */ 6954 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6955 if (pc->pmat->symmetric_set) { 6956 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6957 } 6958 if (pc->pmat->hermitian_set) { 6959 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6960 } 6961 if (pc->pmat->spd_set) { 6962 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6963 } 6964 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6965 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6966 } 6967 /* set operators */ 6968 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6969 if (pcbddc->dbg_flag) { 6970 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6971 } 6972 } 6973 ierr = PetscFree(isarray);CHKERRQ(ierr); 6974 #if 0 6975 { 6976 PetscViewer viewer; 6977 char filename[256]; 6978 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6979 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6980 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6981 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6982 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6983 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6984 } 6985 #endif 6986 6987 if (pcbddc->coarse_ksp) { 6988 Vec crhs,csol; 6989 6990 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6991 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6992 if (!csol) { 6993 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6994 } 6995 if (!crhs) { 6996 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6997 } 6998 } 6999 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7000 7001 /* compute null space for coarse solver if the benign trick has been requested */ 7002 if (pcbddc->benign_null) { 7003 7004 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7005 for (i=0;i<pcbddc->benign_n;i++) { 7006 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7007 } 7008 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7009 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7010 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7011 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7012 if (coarse_mat) { 7013 Vec nullv; 7014 PetscScalar *array,*array2; 7015 PetscInt nl; 7016 7017 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7018 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7019 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7020 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7021 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7022 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7023 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7024 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7025 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7026 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7027 } 7028 } 7029 7030 if (pcbddc->coarse_ksp) { 7031 PetscBool ispreonly; 7032 7033 if (CoarseNullSpace) { 7034 PetscBool isnull; 7035 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7036 if (isnull) { 7037 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7038 } 7039 /* TODO: add local nullspaces (if any) */ 7040 } 7041 /* setup coarse ksp */ 7042 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7043 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7044 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7045 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7046 KSP check_ksp; 7047 KSPType check_ksp_type; 7048 PC check_pc; 7049 Vec check_vec,coarse_vec; 7050 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7051 PetscInt its; 7052 PetscBool compute_eigs; 7053 PetscReal *eigs_r,*eigs_c; 7054 PetscInt neigs; 7055 const char *prefix; 7056 7057 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7058 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7059 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7060 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7061 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7062 /* prevent from setup unneeded object */ 7063 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7064 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7065 if (ispreonly) { 7066 check_ksp_type = KSPPREONLY; 7067 compute_eigs = PETSC_FALSE; 7068 } else { 7069 check_ksp_type = KSPGMRES; 7070 compute_eigs = PETSC_TRUE; 7071 } 7072 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7073 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7074 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7075 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7076 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7077 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7078 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7079 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7080 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7081 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7082 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7083 /* create random vec */ 7084 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7085 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7086 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7087 /* solve coarse problem */ 7088 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7089 /* set eigenvalue estimation if preonly has not been requested */ 7090 if (compute_eigs) { 7091 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7092 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7093 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7094 if (neigs) { 7095 lambda_max = eigs_r[neigs-1]; 7096 lambda_min = eigs_r[0]; 7097 if (pcbddc->use_coarse_estimates) { 7098 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7099 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7100 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7101 } 7102 } 7103 } 7104 } 7105 7106 /* check coarse problem residual error */ 7107 if (pcbddc->dbg_flag) { 7108 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7109 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7110 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7111 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7112 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7113 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7114 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7115 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7116 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7117 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7118 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7119 if (CoarseNullSpace) { 7120 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7121 } 7122 if (compute_eigs) { 7123 PetscReal lambda_max_s,lambda_min_s; 7124 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7125 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7126 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7127 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7128 for (i=0;i<neigs;i++) { 7129 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7130 } 7131 } 7132 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7133 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7134 } 7135 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7136 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7137 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7138 if (compute_eigs) { 7139 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7140 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7141 } 7142 } 7143 } 7144 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7145 /* print additional info */ 7146 if (pcbddc->dbg_flag) { 7147 /* waits until all processes reaches this point */ 7148 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7150 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7151 } 7152 7153 /* free memory */ 7154 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7155 PetscFunctionReturn(0); 7156 } 7157 7158 #undef __FUNCT__ 7159 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7160 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7161 { 7162 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7163 PC_IS* pcis = (PC_IS*)pc->data; 7164 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7165 IS subset,subset_mult,subset_n; 7166 PetscInt local_size,coarse_size=0; 7167 PetscInt *local_primal_indices=NULL; 7168 const PetscInt *t_local_primal_indices; 7169 PetscErrorCode ierr; 7170 7171 PetscFunctionBegin; 7172 /* Compute global number of coarse dofs */ 7173 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7174 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7175 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7176 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7177 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7178 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7179 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7180 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7181 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7182 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7183 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7184 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7185 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7186 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7187 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7188 7189 /* check numbering */ 7190 if (pcbddc->dbg_flag) { 7191 PetscScalar coarsesum,*array,*array2; 7192 PetscInt i; 7193 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7194 7195 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7196 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7197 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7198 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7199 /* counter */ 7200 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7201 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7202 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7203 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7204 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7205 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7206 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7207 for (i=0;i<pcbddc->local_primal_size;i++) { 7208 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7209 } 7210 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7211 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7212 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7213 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7214 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7215 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7216 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7217 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7218 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7219 for (i=0;i<pcis->n;i++) { 7220 if (array[i] != 0.0 && array[i] != array2[i]) { 7221 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7222 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7223 set_error = PETSC_TRUE; 7224 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7225 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7226 } 7227 } 7228 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7229 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7230 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7231 for (i=0;i<pcis->n;i++) { 7232 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7233 } 7234 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7235 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7236 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7237 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7238 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7239 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7240 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7241 PetscInt *gidxs; 7242 7243 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7244 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7245 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7246 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7247 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7248 for (i=0;i<pcbddc->local_primal_size;i++) { 7249 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7250 } 7251 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7252 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7253 } 7254 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7255 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7256 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7257 } 7258 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7259 /* get back data */ 7260 *coarse_size_n = coarse_size; 7261 *local_primal_indices_n = local_primal_indices; 7262 PetscFunctionReturn(0); 7263 } 7264 7265 #undef __FUNCT__ 7266 #define __FUNCT__ "PCBDDCGlobalToLocal" 7267 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7268 { 7269 IS localis_t; 7270 PetscInt i,lsize,*idxs,n; 7271 PetscScalar *vals; 7272 PetscErrorCode ierr; 7273 7274 PetscFunctionBegin; 7275 /* get indices in local ordering exploiting local to global map */ 7276 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7277 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7278 for (i=0;i<lsize;i++) vals[i] = 1.0; 7279 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7280 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7281 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7282 if (idxs) { /* multilevel guard */ 7283 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7284 } 7285 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7286 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7287 ierr = PetscFree(vals);CHKERRQ(ierr); 7288 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7289 /* now compute set in local ordering */ 7290 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7291 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7292 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7293 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7294 for (i=0,lsize=0;i<n;i++) { 7295 if (PetscRealPart(vals[i]) > 0.5) { 7296 lsize++; 7297 } 7298 } 7299 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7300 for (i=0,lsize=0;i<n;i++) { 7301 if (PetscRealPart(vals[i]) > 0.5) { 7302 idxs[lsize++] = i; 7303 } 7304 } 7305 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7306 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7307 *localis = localis_t; 7308 PetscFunctionReturn(0); 7309 } 7310 7311 #undef __FUNCT__ 7312 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7313 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7314 { 7315 PC_IS *pcis=(PC_IS*)pc->data; 7316 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7317 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7318 Mat S_j; 7319 PetscInt *used_xadj,*used_adjncy; 7320 PetscBool free_used_adj; 7321 PetscErrorCode ierr; 7322 7323 PetscFunctionBegin; 7324 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7325 free_used_adj = PETSC_FALSE; 7326 if (pcbddc->sub_schurs_layers == -1) { 7327 used_xadj = NULL; 7328 used_adjncy = NULL; 7329 } else { 7330 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7331 used_xadj = pcbddc->mat_graph->xadj; 7332 used_adjncy = pcbddc->mat_graph->adjncy; 7333 } else if (pcbddc->computed_rowadj) { 7334 used_xadj = pcbddc->mat_graph->xadj; 7335 used_adjncy = pcbddc->mat_graph->adjncy; 7336 } else { 7337 PetscBool flg_row=PETSC_FALSE; 7338 const PetscInt *xadj,*adjncy; 7339 PetscInt nvtxs; 7340 7341 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7342 if (flg_row) { 7343 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7344 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7345 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7346 free_used_adj = PETSC_TRUE; 7347 } else { 7348 pcbddc->sub_schurs_layers = -1; 7349 used_xadj = NULL; 7350 used_adjncy = NULL; 7351 } 7352 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7353 } 7354 } 7355 7356 /* setup sub_schurs data */ 7357 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7358 if (!sub_schurs->schur_explicit) { 7359 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7360 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7361 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7362 } else { 7363 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7364 PetscBool isseqaij,need_change = PETSC_FALSE; 7365 PetscInt benign_n; 7366 Mat change = NULL; 7367 Vec scaling = NULL; 7368 IS change_primal = NULL; 7369 7370 if (!pcbddc->use_vertices && reuse_solvers) { 7371 PetscInt n_vertices; 7372 7373 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7374 reuse_solvers = (PetscBool)!n_vertices; 7375 } 7376 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7377 if (!isseqaij) { 7378 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7379 if (matis->A == pcbddc->local_mat) { 7380 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7381 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7382 } else { 7383 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7384 } 7385 } 7386 if (!pcbddc->benign_change_explicit) { 7387 benign_n = pcbddc->benign_n; 7388 } else { 7389 benign_n = 0; 7390 } 7391 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7392 We need a global reduction to avoid possible deadlocks. 7393 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7394 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7395 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7396 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7397 need_change = (PetscBool)(!need_change); 7398 } 7399 /* If the user defines additional constraints, we import them here. 7400 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7401 if (need_change) { 7402 PC_IS *pcisf; 7403 PC_BDDC *pcbddcf; 7404 PC pcf; 7405 7406 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7407 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7408 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7409 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7410 /* hacks */ 7411 pcisf = (PC_IS*)pcf->data; 7412 pcisf->is_B_local = pcis->is_B_local; 7413 pcisf->vec1_N = pcis->vec1_N; 7414 pcisf->BtoNmap = pcis->BtoNmap; 7415 pcisf->n = pcis->n; 7416 pcisf->n_B = pcis->n_B; 7417 pcbddcf = (PC_BDDC*)pcf->data; 7418 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7419 pcbddcf->mat_graph = pcbddc->mat_graph; 7420 pcbddcf->use_faces = PETSC_TRUE; 7421 pcbddcf->use_change_of_basis = PETSC_TRUE; 7422 pcbddcf->use_change_on_faces = PETSC_TRUE; 7423 pcbddcf->use_qr_single = PETSC_TRUE; 7424 pcbddcf->fake_change = PETSC_TRUE; 7425 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7426 /* store information on primal vertices and change of basis (in local numbering) */ 7427 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7428 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7429 change = pcbddcf->ConstraintMatrix; 7430 pcbddcf->ConstraintMatrix = NULL; 7431 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7432 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7433 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7434 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7435 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7436 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7437 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7438 pcf->ops->destroy = NULL; 7439 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7440 } 7441 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7442 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7443 ierr = MatDestroy(&change);CHKERRQ(ierr); 7444 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7445 } 7446 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7447 7448 /* free adjacency */ 7449 if (free_used_adj) { 7450 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7451 } 7452 PetscFunctionReturn(0); 7453 } 7454 7455 #undef __FUNCT__ 7456 #define __FUNCT__ "PCBDDCInitSubSchurs" 7457 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7458 { 7459 PC_IS *pcis=(PC_IS*)pc->data; 7460 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7461 PCBDDCGraph graph; 7462 PetscErrorCode ierr; 7463 7464 PetscFunctionBegin; 7465 /* attach interface graph for determining subsets */ 7466 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7467 IS verticesIS,verticescomm; 7468 PetscInt vsize,*idxs; 7469 7470 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7471 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7472 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7473 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7474 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7475 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7476 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7477 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7478 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7479 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7480 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7481 } else { 7482 graph = pcbddc->mat_graph; 7483 } 7484 /* print some info */ 7485 if (pcbddc->dbg_flag) { 7486 IS vertices; 7487 PetscInt nv,nedges,nfaces; 7488 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7489 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7490 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7491 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7492 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7493 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7494 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7495 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7496 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7497 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7498 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7499 } 7500 7501 /* sub_schurs init */ 7502 if (!pcbddc->sub_schurs) { 7503 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7504 } 7505 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 7506 7507 /* free graph struct */ 7508 if (pcbddc->sub_schurs_rebuild) { 7509 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7510 } 7511 PetscFunctionReturn(0); 7512 } 7513 7514 #undef __FUNCT__ 7515 #define __FUNCT__ "PCBDDCCheckOperator" 7516 PetscErrorCode PCBDDCCheckOperator(PC pc) 7517 { 7518 PC_IS *pcis=(PC_IS*)pc->data; 7519 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7520 PetscErrorCode ierr; 7521 7522 PetscFunctionBegin; 7523 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7524 IS zerodiag = NULL; 7525 Mat S_j,B0_B=NULL; 7526 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7527 PetscScalar *p0_check,*array,*array2; 7528 PetscReal norm; 7529 PetscInt i; 7530 7531 /* B0 and B0_B */ 7532 if (zerodiag) { 7533 IS dummy; 7534 7535 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7536 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7537 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7538 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7539 } 7540 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7541 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7542 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7543 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7544 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7545 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7546 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7547 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7548 /* S_j */ 7549 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7550 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7551 7552 /* mimic vector in \widetilde{W}_\Gamma */ 7553 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7554 /* continuous in primal space */ 7555 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7556 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7557 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7558 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7559 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7560 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7561 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7562 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7563 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7564 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7565 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7566 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7567 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7568 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7569 7570 /* assemble rhs for coarse problem */ 7571 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7572 /* local with Schur */ 7573 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7574 if (zerodiag) { 7575 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7576 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7577 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7578 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7579 } 7580 /* sum on primal nodes the local contributions */ 7581 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7582 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7583 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7584 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7585 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7586 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7587 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7588 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7589 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7590 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7591 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7592 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7593 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7594 /* scale primal nodes (BDDC sums contibutions) */ 7595 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7596 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7597 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7598 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7599 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7600 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7601 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7602 /* global: \widetilde{B0}_B w_\Gamma */ 7603 if (zerodiag) { 7604 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7605 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7606 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7607 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7608 } 7609 /* BDDC */ 7610 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7611 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7612 7613 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7614 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7615 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7616 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7617 for (i=0;i<pcbddc->benign_n;i++) { 7618 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7619 } 7620 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7621 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7622 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7623 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7624 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7625 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7626 } 7627 PetscFunctionReturn(0); 7628 } 7629