1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 84 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 85 86 /* gradients */ 87 ptr = work + 5*esize; 88 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 89 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 90 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 91 ierr = MatDestroy(&GE);CHKERRQ(ierr); 92 93 /* constants */ 94 ptr += rsize*csize; 95 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 96 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 97 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 98 ierr = MatDestroy(&GE);CHKERRQ(ierr); 99 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 100 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "PCBDDCNedelecSupport" 106 PetscErrorCode PCBDDCNedelecSupport(PC pc) 107 { 108 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 109 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 110 Mat G,T,conn,lG,lGt,lGis,lGall,lGe; 111 MatNullSpace nnsp; 112 Vec tvec,*quads; 113 PetscSF sfv; 114 ISLocalToGlobalMapping el2g,vl2g; 115 MPI_Comm comm; 116 IS lned,primals; 117 IS *eedges,*extrows,*extcols; 118 PetscBT btv,bte,btvc,btb,btvcand,btvi; 119 PetscScalar *vals,*work; 120 PetscReal *rwork; 121 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 122 PetscInt ne,nv,Ne,Nv,Le,Lv,order; 123 PetscInt n_neigh,*neigh,*n_shared,**shared; 124 PetscInt i,j,extmem,cum,maxsize,rst,nee,nquads=2; 125 PetscInt *extrow,*extrowcum,*marks,*emarks,*vmarks,*gidxs; 126 PetscInt *sfvleaves,*sfvroots; 127 PetscBool print,eerr,done,lrc[2],conforming,ismpiaij; 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 /* test variable order code and print debug info TODO: to be removed */ 132 print = PETSC_FALSE; 133 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 134 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 135 136 /* Return to caller if there are no edges in the decomposition */ 137 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 138 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 139 ierr = ISLocalToGlobalMappingGetSize(el2g,&ne);CHKERRQ(ierr); 140 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 141 lrc[0] = PETSC_FALSE; 142 for (i=0;i<ne;i++) { 143 if (PetscRealPart(vals[i]) > 2.) { 144 lrc[0] = PETSC_TRUE; 145 break; 146 } 147 } 148 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 149 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 150 if (!lrc[1]) PetscFunctionReturn(0); 151 152 /* Get relevant objects */ 153 G = pcbddc->discretegradient; 154 order = pcbddc->nedorder; 155 conforming = pcbddc->conforming; 156 157 /* Compute local and global sizes of egde dofs and nodal dofs */ 158 ierr = MatGetSize(G,&Ne,&Nv);CHKERRQ(ierr); 159 ierr = MatGetLocalSize(G,&Le,&Lv);CHKERRQ(ierr); 160 161 /* Sanity checks */ 162 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 163 ierr = PetscObjectTypeCompare((PetscObject)G,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 164 if (!ismpiaij) SETERRQ1(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with discrete gradient of type %s. Please use MPIAIJ",((PetscObject)G)->type_name); 165 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 166 if (order && Ne%order) SETERRQ2(comm,PETSC_ERR_USER,"The number of edge dofs %d it's not a multiple of the order %d",Ne,order); 167 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 168 ierr = MatGetSize(pc->pmat,&i,NULL);CHKERRQ(ierr); 169 if (Ne != i) SETERRQ2(comm,PETSC_ERR_USER,"Global matrix sizes should match! Number of rows of G %d differs from that of A %d",Ne,i); 170 ierr = MatGetLocalSize(pc->pmat,&i,NULL);CHKERRQ(ierr); 171 if (Le != i) SETERRQ2(comm,PETSC_ERR_USER,"Local matrix sizes should match! Number of local rows of G %d differs from that of A %d",Le,i); 172 173 /* Drop connections for interior edges (this modifies G) */ 174 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 175 ierr = VecScatterBegin(matis->rctx,matis->counter,tvec,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 176 ierr = VecScatterEnd(matis->rctx,matis->counter,tvec,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 177 ierr = PetscMalloc1(Le,&marks);CHKERRQ(ierr); 178 ierr = VecGetOwnershipRange(tvec,&rst,NULL);CHKERRQ(ierr); 179 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&vals);CHKERRQ(ierr); 180 cum = 0; 181 for (i=0;i<Le;i++) if (PetscRealPart(vals[i]) < 1.5) marks[cum++] = i+rst; 182 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&vals);CHKERRQ(ierr); 183 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 184 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 185 ierr = MatZeroRows(G,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 186 ierr = PetscFree(marks);CHKERRQ(ierr); 187 188 /* Extract subdomain relevant rows of G */ 189 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 190 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 191 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 193 ierr = ISDestroy(&lned);CHKERRQ(ierr); 194 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 195 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 196 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 197 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 198 if (print) { 199 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 200 ierr = MatView(lG,NULL);CHKERRQ(ierr); 201 } 202 203 /* SF for nodal communications */ 204 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 205 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 207 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 208 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 209 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 210 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 211 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 212 213 /* Destroy temporary G created in MATIS format */ 214 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 215 216 /* Analyze the edge-nodes connections (duplicate lG) */ 217 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 218 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 219 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 220 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 221 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 222 /* need to import the boundary specification to ensure the 223 proper detection of coarse edges' endpoints */ 224 if (pcbddc->DirichletBoundariesLocal) { 225 ierr = ISGetLocalSize(pcbddc->DirichletBoundariesLocal,&cum);CHKERRQ(ierr); 226 ierr = ISGetIndices(pcbddc->DirichletBoundariesLocal,&idxs);CHKERRQ(ierr); 227 for (i=0;i<cum;i++) { 228 if (idxs[i] >= 0) { 229 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 230 } 231 } 232 ierr = ISRestoreIndices(pcbddc->DirichletBoundariesLocal,&idxs);CHKERRQ(ierr); 233 } 234 if (pcbddc->NeumannBoundariesLocal) { 235 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&cum);CHKERRQ(ierr); 236 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 237 for (i=0;i<cum;i++) { 238 if (idxs[i] >= 0) { 239 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 240 } 241 } 242 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 243 } 244 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 245 /* need to remove coarse faces' dofs to ensure the 246 proper detection of coarse edges' endpoints */ 247 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 248 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 249 for (i=1;i<n_neigh;i++) { 250 PetscInt j; 251 for (j=0;j<n_shared[i];j++) marks[shared[i][j]]++; 252 } 253 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 254 cum = 0; 255 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 256 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 257 for (i=0;i<ne;i++) { 258 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 259 if (!marks[i] || (marks[i] == 1 && !PetscBTLookup(btb,i))) { 260 marks[cum++] = i; 261 continue; 262 } 263 /* set badly connected edge dofs as primal */ 264 if (!conforming && ii[i+1]-ii[i] != order + 1) { 265 marks[cum++] = i; 266 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 267 for (j=ii[i];j<ii[i+1];j++) { 268 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 269 } 270 } 271 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 272 if (!order && ii[i+1] != ii[i]) { 273 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 274 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 275 } 276 } 277 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 278 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 279 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 280 /* identify splitpoints and corner candidates: TODO variable order */ 281 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 282 if (print) { 283 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 284 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 285 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 286 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 287 } 288 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 289 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 290 for (i=0;i<nv;i++) { 291 PetscInt ord = order, test = ii[i+1]-ii[i]; 292 if (!order) { 293 PetscReal vorder = 0.; 294 295 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 296 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 297 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 298 ord = 1; 299 } 300 #if defined(PETSC_USE_DEBUG) 301 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 302 #endif 303 if (test >= 3*ord) { /* splitpoints */ 304 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 305 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 306 } else if (test == ord) { 307 if (order == 1) { 308 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 309 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 310 } else { 311 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 312 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 313 } 314 } 315 } 316 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 317 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 318 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 319 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 320 321 /* Get the local G^T explicitly */ 322 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 323 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 324 if (print) { 325 ierr = PetscObjectSetName((PetscObject)lGt,"initial_lGt");CHKERRQ(ierr); 326 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 327 } 328 329 /* Mark interior nodal dofs */ 330 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 331 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 332 for (i=1;i<n_neigh;i++) { 333 for (j=0;j<n_shared[i];j++) { 334 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 335 } 336 } 337 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 338 339 /* communicate corners and splitpoints */ 340 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 341 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 342 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 343 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 344 345 if (print) { 346 IS tbz; 347 348 cum = 0; 349 for (i=0;i<nv;i++) 350 if (sfvleaves[i]) 351 vmarks[cum++] = i; 352 353 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 354 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 355 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 356 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 357 } 358 359 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 360 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 361 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 362 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 363 364 /* Zero rows of lGt corresponding to identified corners 365 and interior nodal dofs */ 366 cum = 0; 367 for (i=0;i<nv;i++) { 368 if (sfvleaves[i]) { 369 vmarks[cum++] = i; 370 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 371 } 372 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 373 } 374 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 375 if (print) { 376 IS tbz; 377 378 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 379 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 380 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 381 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 382 } 383 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 384 ierr = PetscFree(vmarks);CHKERRQ(ierr); 385 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 386 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 387 388 /* Recompute G */ 389 ierr = MatDestroy(&lG);CHKERRQ(ierr); 390 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 391 if (print) { 392 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 393 ierr = MatView(lG,NULL);CHKERRQ(ierr); 394 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 395 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 396 } 397 398 /* Get primal dofs (if any) */ 399 cum = 0; 400 for (i=0;i<ne;i++) { 401 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 402 } 403 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 404 if (print) { 405 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 406 ierr = ISView(primals,NULL);CHKERRQ(ierr); 407 } 408 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 409 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 410 ierr = ISDestroy(&primals);CHKERRQ(ierr); 411 412 /* Compute edge connectivity */ 413 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 414 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 415 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 416 if (done && i && ii && jj) { /* when lG is empty, don't pass pointers */ 417 ierr = PCBDDCSetLocalAdjacencyGraph(pc,ne,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 418 } 419 /* Analyze interface for edge dofs */ 420 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 421 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 422 423 /* Get coarse edges in the edge space */ 424 pcbddc->mat_graph->twodim = PETSC_FALSE; 425 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 426 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 427 428 /* Mark fine edge dofs with their coarse edge id */ 429 maxsize = 0; 430 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 431 for (i=0;i<nee;i++) { 432 PetscInt size,mark = i+1; 433 434 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 435 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 436 for (j=0;j<size;j++) marks[idxs[j]] = mark; 437 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 438 maxsize = PetscMax(maxsize,size); 439 } 440 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 441 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 442 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 443 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 444 if (print) { 445 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 446 ierr = ISView(primals,NULL);CHKERRQ(ierr); 447 } 448 449 /* Find coarse edge endpoints */ 450 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 451 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 452 for (i=0;i<nee;i++) { 453 PetscInt mark = i+1,size; 454 455 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 456 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 457 if (print) { 458 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 459 ISView(eedges[i],NULL); 460 } 461 for (j=0;j<size;j++) { 462 PetscInt k, ee = idxs[j]; 463 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 464 for (k=ii[ee];k<ii[ee+1];k++) { 465 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 466 if (PetscBTLookup(btv,jj[k])) { 467 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 468 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 469 if (print) PetscPrintf(PETSC_COMM_SELF," candidate? %d\n",(PetscBool)(PetscBTLookup(btvcand,jj[k]))); 470 PetscInt k2; 471 PetscBool corner = PETSC_FALSE; 472 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 473 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: count %d, mark %d (ref mark %d), special %d\n",jjt[k2],pcbddc->mat_graph->count[jjt[k2]],marks[jjt[k2]],mark,pcbddc->mat_graph->special_dof[jjt[k2]]); 474 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || 475 pcbddc->mat_graph->special_dof[jjt[k2]] == PCBDDCGRAPH_DIRICHLET_MARK) { 476 corner = PETSC_TRUE; 477 break; 478 } 479 } 480 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 481 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 482 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 483 } else { 484 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 485 } 486 } 487 } 488 } 489 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 490 } 491 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 492 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 493 494 /* Reset marked primal dofs */ 495 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 496 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 497 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 498 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 499 500 /* Compute extended cols indices */ 501 ierr = MatGetRowIJ(lG ,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 502 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 503 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 504 i *= maxsize; 505 ierr = PetscMalloc1(nee,&extcols);CHKERRQ(ierr); 506 ierr = PetscCalloc1(nee,&emarks);CHKERRQ(ierr); 507 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 508 eerr = PETSC_FALSE; 509 for (i=0;i<nee;i++) { 510 PetscInt size; 511 512 cum = 0; 513 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 514 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 515 for (j=0;j<size;j++) { 516 PetscInt k,ee = idxs[j]; 517 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 518 } 519 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 520 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 521 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 522 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 523 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 524 /* it may happen that endpoints are not defined at this point 525 if it is the case, mark this edge for a second pass */ 526 if (cum != size -1) { 527 emarks[i] = 1; 528 if (print) { 529 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 530 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 531 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 532 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 533 } 534 eerr = PETSC_TRUE; 535 } 536 } 537 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 538 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 539 if (done) { 540 PetscInt *newprimals; 541 542 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 543 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 544 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 545 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 546 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 547 for (i=0;i<nee;i++) { 548 if (emarks[i]) { 549 PetscInt size,mark = i+1; 550 551 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 552 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 553 for (j=0;j<size;j++) { 554 /* newprimals[cum++] = idxs[j]; */ 555 PetscInt k,ee = idxs[j]; 556 for (k=ii[ee];k<ii[ee+1];k++) { 557 /* set all candidates located on the edge as corners */ 558 if (PetscBTLookup(btvcand,jj[k])) { 559 PetscInt k2,vv = jj[k]; 560 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 561 /* set all edge dofs connected to candidate as primals */ 562 for (k2=iit[vv];k2<iit[vv+1];k2++) { 563 if (marks[jjt[k2]] == mark) { 564 PetscInt k3,ee2 = jjt[k2]; 565 newprimals[cum++] = ee2; 566 /* finally set the new corners */ 567 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 568 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 } 574 } 575 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 576 } 577 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 578 } 579 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 580 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 581 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 582 ierr = PetscFree(newprimals);CHKERRQ(ierr); 583 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 584 ierr = ISDestroy(&primals);CHKERRQ(ierr); 585 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 586 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 587 588 /* Mark again */ 589 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 590 for (i=0;i<nee;i++) { 591 PetscInt size,mark = i+1; 592 593 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 594 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 595 for (j=0;j<size;j++) marks[idxs[j]] = mark; 596 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 597 } 598 if (print) { 599 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 600 ierr = ISView(primals,NULL);CHKERRQ(ierr); 601 } 602 603 /* Recompute extended cols */ 604 eerr = PETSC_FALSE; 605 for (i=0;i<nee;i++) { 606 PetscInt size; 607 608 cum = 0; 609 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 610 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 611 for (j=0;j<size;j++) { 612 PetscInt k,ee = idxs[j]; 613 for (k=ii[ee];k<ii[ee+1];k++) { 614 if (!PetscBTLookup(btv,jj[k])) { 615 extrow[cum++] = jj[k]; 616 } 617 } 618 } 619 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 620 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 621 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 622 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 623 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 624 if (cum != size -1) { 625 if (print) { 626 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 627 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 628 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 629 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 630 } 631 eerr = PETSC_TRUE; 632 } 633 } 634 } 635 ierr = MatRestoreRowIJ( lG,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 636 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 637 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 638 ierr = PetscFree(emarks);CHKERRQ(ierr); 639 /* an error should not occur at this point */ 640 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 641 642 /* Check the number of endpoints */ 643 /* TODO: add case for circular edge */ 644 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 645 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 646 for (i=0;i<nee;i++) { 647 PetscInt size, found = 0; 648 649 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 650 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 651 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 652 for (j=0;j<size;j++) { 653 PetscInt k,ee = idxs[j]; 654 for (k=ii[ee];k<ii[ee+1];k++) { 655 PetscInt vv = jj[k]; 656 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 657 found++; 658 } 659 } 660 } 661 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 662 if (found != 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d\n",found,i); 663 } 664 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 665 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 666 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 667 668 #if defined(PETSC_USE_DEBUG) 669 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 670 not interfere with neighbouring coarse edges */ 671 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 672 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 673 for (i=0;i<nv;i++) { 674 PetscInt emax = 0,eemax = 0; 675 676 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 677 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 678 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 679 for (j=1;j<nee+1;j++) { 680 if (emax < emarks[j]) { 681 emax = emarks[j]; 682 eemax = j; 683 } 684 } 685 /* not relevant for edges */ 686 if (!eemax) continue; 687 688 for (j=ii[i];j<ii[i+1];j++) { 689 if (marks[jj[j]] && marks[jj[j]] != eemax) { 690 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dod %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 691 } 692 } 693 } 694 ierr = PetscFree(emarks);CHKERRQ(ierr); 695 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 696 #endif 697 698 /* Compute extended rows indices for edge blocks of the change of basis */ 699 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 700 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 701 extmem *= maxsize; 702 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 703 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 704 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 705 for (i=0;i<nv;i++) { 706 PetscInt mark = 0,size,start; 707 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 708 for (j=ii[i];j<ii[i+1];j++) 709 if (marks[jj[j]] && !mark) 710 mark = marks[jj[j]]; 711 712 /* not relevant */ 713 if (!mark) continue; 714 715 /* import extended row */ 716 mark--; 717 start = mark*extmem+extrowcum[mark]; 718 size = ii[i+1]-ii[i]; 719 #if defined(PETSC_USE_DEBUG) 720 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 721 #endif 722 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 723 extrowcum[mark] += size; 724 } 725 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 726 cum = 0; 727 for (i=0;i<nee;i++) { 728 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 729 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 731 cum = PetscMax(cum,size); 732 } 733 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 734 ierr = PetscFree(marks);CHKERRQ(ierr); 735 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 736 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 737 738 /* Workspace for lapack inner calls and VecSetValues */ 739 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 740 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 741 for (i=0;i<maxsize;i++) vals[i] = 1.; 742 743 /* Create vectors for quadrature rules */ 744 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 745 for (i=0;i<nquads;i++) { 746 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 747 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 748 } 749 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 750 751 /* Create change of basis matrix (preallocation can be improved) */ 752 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 753 ierr = MatSetSizes(T,Le,Le,Ne,Ne);CHKERRQ(ierr); 754 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 755 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 756 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 757 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 758 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 759 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 760 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 761 762 /* Defaults to identity */ 763 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 764 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 765 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 766 767 for (i=0;i<nee;i++) { 768 Mat Gins = NULL, GKins = NULL; 769 770 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 771 if (Gins && GKins) { 772 PetscScalar *data; 773 const PetscInt *rows,*cols; 774 PetscInt nrh,nch,nrc,ncc; 775 776 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 777 /* H1 */ 778 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 779 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 780 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 781 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 782 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 783 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 784 /* complement */ 785 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 786 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 787 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 788 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 789 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 790 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 791 /* Gins kernel quadratures */ 792 for (j=0;j<ncc;j++) { 793 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 794 } 795 /* H1 average */ 796 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 797 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 798 } 799 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 800 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 801 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 802 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 803 } 804 805 /* Start assembling */ 806 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 for (i=0;i<nquads;i++) { 808 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 809 } 810 811 /* Free */ 812 ierr = PetscFree(extrow);CHKERRQ(ierr); 813 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 814 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 815 ierr = PetscFree(vals);CHKERRQ(ierr); 816 ierr = PetscFree(extrows);CHKERRQ(ierr); 817 ierr = PetscFree(extcols);CHKERRQ(ierr); 818 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 819 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 820 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 821 ierr = MatDestroy(&lG);CHKERRQ(ierr); 822 ierr = MatDestroy(&conn);CHKERRQ(ierr); 823 824 /* Complete assembling */ 825 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 826 for (i=0;i<nquads;i++) { 827 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 828 } 829 for (i=0;i<nquads;i++) { 830 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 831 } 832 ierr = PetscFree(quads);CHKERRQ(ierr); 833 834 /* tell PCBDDC the topography has been analyzed */ 835 pcbddc->recompute_topography = PETSC_FALSE; 836 837 /* set change of basis */ 838 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 839 ierr = MatDestroy(&T);CHKERRQ(ierr); 840 841 /* set quadratures */ 842 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 843 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 844 845 PetscFunctionReturn(0); 846 } 847 848 /* the near-null space of BDDC carries information on quadrature weights, 849 and these can be collinear -> so cheat with MatNullSpaceCreate 850 and create a suitable set of basis vectors first */ 851 #undef __FUNCT__ 852 #define __FUNCT__ "PCBDDCNullSpaceCreate" 853 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 854 { 855 PetscErrorCode ierr; 856 PetscInt i; 857 858 PetscFunctionBegin; 859 for (i=0;i<nvecs;i++) { 860 PetscInt first,last; 861 862 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 863 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 864 if (i>=first && i < last) { 865 PetscScalar *data; 866 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 867 if (!has_const) { 868 data[i-first] = 1.; 869 } else { 870 data[2*i-first] = 1./PetscSqrtReal(2.); 871 data[2*i-first+1] = -1./PetscSqrtReal(2.); 872 } 873 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 874 } 875 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 876 } 877 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 878 for (i=0;i<nvecs;i++) { /* reset vectors */ 879 PetscInt first,last; 880 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 881 if (i>=first && i < last) { 882 PetscScalar *data; 883 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 884 if (!has_const) { 885 data[i-first] = 0.; 886 } else { 887 data[2*i-first] = 0.; 888 data[2*i-first+1] = 0.; 889 } 890 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 891 } 892 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 893 } 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 899 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 900 { 901 Mat loc_divudotp; 902 Vec p,v,vins,quad_vec,*quad_vecs; 903 ISLocalToGlobalMapping map; 904 IS *faces,*edges; 905 PetscScalar *vals; 906 const PetscScalar *array; 907 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 908 PetscMPIInt rank; 909 PetscErrorCode ierr; 910 911 PetscFunctionBegin; 912 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 913 if (graph->twodim) { 914 lmaxneighs = 2; 915 } else { 916 lmaxneighs = 1; 917 for (i=0;i<ne;i++) { 918 const PetscInt *idxs; 919 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 920 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 921 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 922 } 923 lmaxneighs++; /* graph count does not include self */ 924 } 925 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 926 maxsize = 0; 927 for (i=0;i<ne;i++) { 928 PetscInt nn; 929 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 930 maxsize = PetscMax(maxsize,nn); 931 } 932 for (i=0;i<nf;i++) { 933 PetscInt nn; 934 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 935 maxsize = PetscMax(maxsize,nn); 936 } 937 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 938 /* create vectors to hold quadrature weights */ 939 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 940 if (!transpose) { 941 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 942 } else { 943 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 944 } 945 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 946 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 947 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 948 for (i=0;i<maxneighs;i++) { 949 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 950 } 951 952 /* compute local quad vec */ 953 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 954 if (!transpose) { 955 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 956 } else { 957 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 958 } 959 ierr = VecSet(p,1.);CHKERRQ(ierr); 960 if (!transpose) { 961 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 962 } else { 963 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 964 } 965 if (vl2l) { 966 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 967 } else { 968 vins = v; 969 } 970 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 971 ierr = VecDestroy(&p);CHKERRQ(ierr); 972 973 /* insert in global quadrature vecs */ 974 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 975 for (i=0;i<nf;i++) { 976 const PetscInt *idxs; 977 PetscInt idx,nn,j; 978 979 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 980 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 981 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 982 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 983 idx = -(idx+1); 984 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 985 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 986 } 987 for (i=0;i<ne;i++) { 988 const PetscInt *idxs; 989 PetscInt idx,nn,j; 990 991 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 992 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 993 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 994 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 995 idx = -(idx+1); 996 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 997 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 998 } 999 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1000 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1001 if (vl2l) { 1002 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1003 } 1004 ierr = VecDestroy(&v);CHKERRQ(ierr); 1005 ierr = PetscFree(vals);CHKERRQ(ierr); 1006 1007 /* assemble near null space */ 1008 for (i=0;i<maxneighs;i++) { 1009 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1010 } 1011 for (i=0;i<maxneighs;i++) { 1012 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1013 } 1014 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1021 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1022 { 1023 PetscErrorCode ierr; 1024 Vec local,global; 1025 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1026 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1027 1028 PetscFunctionBegin; 1029 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1030 /* need to convert from global to local topology information and remove references to information in global ordering */ 1031 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1032 if (pcbddc->user_provided_isfordofs) { 1033 if (pcbddc->n_ISForDofs) { 1034 PetscInt i; 1035 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1036 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1037 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1038 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1039 } 1040 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1041 pcbddc->n_ISForDofs = 0; 1042 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1043 } 1044 } else { 1045 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1046 PetscInt i, n = matis->A->rmap->n; 1047 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1048 if (i > 1) { 1049 pcbddc->n_ISForDofsLocal = i; 1050 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1051 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1052 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1053 } 1054 } 1055 } 1056 } 1057 1058 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1059 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1060 } 1061 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1062 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1063 } 1064 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1065 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1066 } 1067 ierr = VecDestroy(&global);CHKERRQ(ierr); 1068 ierr = VecDestroy(&local);CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1074 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1075 { 1076 PC_IS *pcis = (PC_IS*)(pc->data); 1077 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1078 PetscErrorCode ierr; 1079 1080 PetscFunctionBegin; 1081 if (!pcbddc->benign_have_null) { 1082 PetscFunctionReturn(0); 1083 } 1084 if (pcbddc->ChangeOfBasisMatrix) { 1085 Vec swap; 1086 1087 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1088 swap = pcbddc->work_change; 1089 pcbddc->work_change = r; 1090 r = swap; 1091 } 1092 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1093 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1094 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1095 ierr = VecSet(z,0.);CHKERRQ(ierr); 1096 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1097 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1098 if (pcbddc->ChangeOfBasisMatrix) { 1099 Vec swap; 1100 1101 swap = r; 1102 r = pcbddc->work_change; 1103 pcbddc->work_change = swap; 1104 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1105 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1106 } 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1112 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1113 { 1114 PCBDDCBenignMatMult_ctx ctx; 1115 PetscErrorCode ierr; 1116 PetscBool apply_right,apply_left,reset_x; 1117 1118 PetscFunctionBegin; 1119 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1120 if (transpose) { 1121 apply_right = ctx->apply_left; 1122 apply_left = ctx->apply_right; 1123 } else { 1124 apply_right = ctx->apply_right; 1125 apply_left = ctx->apply_left; 1126 } 1127 reset_x = PETSC_FALSE; 1128 if (apply_right) { 1129 const PetscScalar *ax; 1130 PetscInt nl,i; 1131 1132 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1133 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1134 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1135 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1136 for (i=0;i<ctx->benign_n;i++) { 1137 PetscScalar sum,val; 1138 const PetscInt *idxs; 1139 PetscInt nz,j; 1140 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1141 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1142 sum = 0.; 1143 if (ctx->apply_p0) { 1144 val = ctx->work[idxs[nz-1]]; 1145 for (j=0;j<nz-1;j++) { 1146 sum += ctx->work[idxs[j]]; 1147 ctx->work[idxs[j]] += val; 1148 } 1149 } else { 1150 for (j=0;j<nz-1;j++) { 1151 sum += ctx->work[idxs[j]]; 1152 } 1153 } 1154 ctx->work[idxs[nz-1]] -= sum; 1155 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1156 } 1157 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1158 reset_x = PETSC_TRUE; 1159 } 1160 if (transpose) { 1161 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1162 } else { 1163 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1164 } 1165 if (reset_x) { 1166 ierr = VecResetArray(x);CHKERRQ(ierr); 1167 } 1168 if (apply_left) { 1169 PetscScalar *ay; 1170 PetscInt i; 1171 1172 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1173 for (i=0;i<ctx->benign_n;i++) { 1174 PetscScalar sum,val; 1175 const PetscInt *idxs; 1176 PetscInt nz,j; 1177 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1178 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1179 val = -ay[idxs[nz-1]]; 1180 if (ctx->apply_p0) { 1181 sum = 0.; 1182 for (j=0;j<nz-1;j++) { 1183 sum += ay[idxs[j]]; 1184 ay[idxs[j]] += val; 1185 } 1186 ay[idxs[nz-1]] += sum; 1187 } else { 1188 for (j=0;j<nz-1;j++) { 1189 ay[idxs[j]] += val; 1190 } 1191 ay[idxs[nz-1]] = 0.; 1192 } 1193 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1194 } 1195 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1196 } 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1202 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1203 { 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1213 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1214 { 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "PCBDDCBenignShellMat" 1224 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1225 { 1226 PC_IS *pcis = (PC_IS*)pc->data; 1227 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1228 PCBDDCBenignMatMult_ctx ctx; 1229 PetscErrorCode ierr; 1230 1231 PetscFunctionBegin; 1232 if (!restore) { 1233 Mat A_IB,A_BI; 1234 PetscScalar *work; 1235 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1236 1237 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1238 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1239 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1240 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1241 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1242 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1243 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1244 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1245 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1246 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1247 ctx->apply_left = PETSC_TRUE; 1248 ctx->apply_right = PETSC_FALSE; 1249 ctx->apply_p0 = PETSC_FALSE; 1250 ctx->benign_n = pcbddc->benign_n; 1251 if (reuse) { 1252 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1253 ctx->free = PETSC_FALSE; 1254 } else { /* TODO: could be optimized for successive solves */ 1255 ISLocalToGlobalMapping N_to_D; 1256 PetscInt i; 1257 1258 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1259 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1260 for (i=0;i<pcbddc->benign_n;i++) { 1261 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1262 } 1263 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1264 ctx->free = PETSC_TRUE; 1265 } 1266 ctx->A = pcis->A_IB; 1267 ctx->work = work; 1268 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1269 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1270 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1271 pcis->A_IB = A_IB; 1272 1273 /* A_BI as A_IB^T */ 1274 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1275 pcbddc->benign_original_mat = pcis->A_BI; 1276 pcis->A_BI = A_BI; 1277 } else { 1278 if (!pcbddc->benign_original_mat) { 1279 PetscFunctionReturn(0); 1280 } 1281 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1282 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1283 pcis->A_IB = ctx->A; 1284 ctx->A = NULL; 1285 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1286 pcis->A_BI = pcbddc->benign_original_mat; 1287 pcbddc->benign_original_mat = NULL; 1288 if (ctx->free) { 1289 PetscInt i; 1290 for (i=0;i<ctx->benign_n;i++) { 1291 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1292 } 1293 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1294 } 1295 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1296 ierr = PetscFree(ctx);CHKERRQ(ierr); 1297 } 1298 PetscFunctionReturn(0); 1299 } 1300 1301 /* used just in bddc debug mode */ 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "PCBDDCBenignProject" 1304 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1305 { 1306 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1307 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1308 Mat An; 1309 PetscErrorCode ierr; 1310 1311 PetscFunctionBegin; 1312 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1313 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1314 if (is1) { 1315 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1316 ierr = MatDestroy(&An);CHKERRQ(ierr); 1317 } else { 1318 *B = An; 1319 } 1320 PetscFunctionReturn(0); 1321 } 1322 1323 /* TODO: add reuse flag */ 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatSeqAIJCompress" 1326 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1327 { 1328 Mat Bt; 1329 PetscScalar *a,*bdata; 1330 const PetscInt *ii,*ij; 1331 PetscInt m,n,i,nnz,*bii,*bij; 1332 PetscBool flg_row; 1333 PetscErrorCode ierr; 1334 1335 PetscFunctionBegin; 1336 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1337 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1338 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1339 nnz = n; 1340 for (i=0;i<ii[n];i++) { 1341 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1342 } 1343 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1344 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1345 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1346 nnz = 0; 1347 bii[0] = 0; 1348 for (i=0;i<n;i++) { 1349 PetscInt j; 1350 for (j=ii[i];j<ii[i+1];j++) { 1351 PetscScalar entry = a[j]; 1352 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1353 bij[nnz] = ij[j]; 1354 bdata[nnz] = entry; 1355 nnz++; 1356 } 1357 } 1358 bii[i+1] = nnz; 1359 } 1360 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1361 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1362 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1363 { 1364 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1365 b->free_a = PETSC_TRUE; 1366 b->free_ij = PETSC_TRUE; 1367 } 1368 *B = Bt; 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatDetectDisconnectedComponents" 1374 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1375 { 1376 Mat B; 1377 IS is_dummy,*cc_n; 1378 ISLocalToGlobalMapping l2gmap_dummy; 1379 PCBDDCGraph graph; 1380 PetscInt i,n; 1381 PetscInt *xadj,*adjncy; 1382 PetscInt *xadj_filtered,*adjncy_filtered; 1383 PetscBool flg_row,isseqaij; 1384 PetscErrorCode ierr; 1385 1386 PetscFunctionBegin; 1387 if (!A->rmap->N || !A->cmap->N) { 1388 *ncc = 0; 1389 *cc = NULL; 1390 PetscFunctionReturn(0); 1391 } 1392 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1393 if (!isseqaij && filter) { 1394 PetscBool isseqdense; 1395 1396 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1397 if (!isseqdense) { 1398 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1399 } else { /* TODO: rectangular case and LDA */ 1400 PetscScalar *array; 1401 PetscReal chop=1.e-6; 1402 1403 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1404 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1405 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1406 for (i=0;i<n;i++) { 1407 PetscInt j; 1408 for (j=i+1;j<n;j++) { 1409 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1410 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1411 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1412 } 1413 } 1414 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1415 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1416 } 1417 } else { 1418 B = A; 1419 } 1420 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1421 1422 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1423 if (filter) { 1424 PetscScalar *data; 1425 PetscInt j,cum; 1426 1427 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1428 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1429 cum = 0; 1430 for (i=0;i<n;i++) { 1431 PetscInt t; 1432 1433 for (j=xadj[i];j<xadj[i+1];j++) { 1434 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1435 continue; 1436 } 1437 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1438 } 1439 t = xadj_filtered[i]; 1440 xadj_filtered[i] = cum; 1441 cum += t; 1442 } 1443 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1444 } else { 1445 xadj_filtered = NULL; 1446 adjncy_filtered = NULL; 1447 } 1448 1449 /* compute local connected components using PCBDDCGraph */ 1450 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1451 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1452 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1453 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1454 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1455 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1456 if (xadj_filtered) { 1457 graph->xadj = xadj_filtered; 1458 graph->adjncy = adjncy_filtered; 1459 } else { 1460 graph->xadj = xadj; 1461 graph->adjncy = adjncy; 1462 } 1463 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1464 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1465 /* partial clean up */ 1466 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1467 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1468 if (A != B) { 1469 ierr = MatDestroy(&B);CHKERRQ(ierr); 1470 } 1471 1472 /* get back data */ 1473 if (ncc) *ncc = graph->ncc; 1474 if (cc) { 1475 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1476 for (i=0;i<graph->ncc;i++) { 1477 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1478 } 1479 *cc = cc_n; 1480 } 1481 /* clean up graph */ 1482 graph->xadj = 0; 1483 graph->adjncy = 0; 1484 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "PCBDDCBenignCheck" 1490 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1491 { 1492 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1493 PC_IS* pcis = (PC_IS*)(pc->data); 1494 IS dirIS = NULL; 1495 PetscInt i; 1496 PetscErrorCode ierr; 1497 1498 PetscFunctionBegin; 1499 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1500 if (zerodiag) { 1501 Mat A; 1502 Vec vec3_N; 1503 PetscScalar *vals; 1504 const PetscInt *idxs; 1505 PetscInt nz,*count; 1506 1507 /* p0 */ 1508 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1509 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1510 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1511 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1512 for (i=0;i<nz;i++) vals[i] = 1.; 1513 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1514 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1515 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1516 /* v_I */ 1517 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1518 for (i=0;i<nz;i++) vals[i] = 0.; 1519 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1520 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1521 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1522 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1523 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1524 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1525 if (dirIS) { 1526 PetscInt n; 1527 1528 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1529 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1530 for (i=0;i<n;i++) vals[i] = 0.; 1531 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1532 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1533 } 1534 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1535 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1536 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1537 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1538 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1539 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1540 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1541 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1542 ierr = PetscFree(vals);CHKERRQ(ierr); 1543 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1544 1545 /* there should not be any pressure dofs lying on the interface */ 1546 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1547 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1548 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1549 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1550 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1551 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1552 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1553 ierr = PetscFree(count);CHKERRQ(ierr); 1554 } 1555 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1556 1557 /* check PCBDDCBenignGetOrSetP0 */ 1558 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1559 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1560 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1561 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1562 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1563 for (i=0;i<pcbddc->benign_n;i++) { 1564 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1565 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1572 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1573 { 1574 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1575 IS pressures,zerodiag,*zerodiag_subs; 1576 PetscInt nz,n; 1577 PetscInt *interior_dofs,n_interior_dofs; 1578 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1579 PetscErrorCode ierr; 1580 1581 PetscFunctionBegin; 1582 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1583 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1584 for (n=0;n<pcbddc->benign_n;n++) { 1585 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1586 } 1587 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1588 pcbddc->benign_n = 0; 1589 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1590 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1591 Checks if all the pressure dofs in each subdomain have a zero diagonal 1592 If not, a change of basis on pressures is not needed 1593 since the local Schur complements are already SPD 1594 */ 1595 has_null_pressures = PETSC_TRUE; 1596 have_null = PETSC_TRUE; 1597 if (pcbddc->n_ISForDofsLocal) { 1598 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1599 1600 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1601 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1602 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1603 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1604 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1605 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1606 if (!sorted) { 1607 ierr = ISSort(pressures);CHKERRQ(ierr); 1608 } 1609 } else { 1610 pressures = NULL; 1611 } 1612 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1613 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1614 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1615 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1616 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1617 if (!sorted) { 1618 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1619 } 1620 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1621 if (!nz) { 1622 if (n) have_null = PETSC_FALSE; 1623 has_null_pressures = PETSC_FALSE; 1624 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1625 } 1626 recompute_zerodiag = PETSC_FALSE; 1627 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1628 zerodiag_subs = NULL; 1629 pcbddc->benign_n = 0; 1630 n_interior_dofs = 0; 1631 interior_dofs = NULL; 1632 if (pcbddc->current_level) { /* need to compute interior nodes */ 1633 PetscInt n,i,j; 1634 PetscInt n_neigh,*neigh,*n_shared,**shared; 1635 PetscInt *iwork; 1636 1637 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1638 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1639 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1640 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1641 for (i=1;i<n_neigh;i++) 1642 for (j=0;j<n_shared[i];j++) 1643 iwork[shared[i][j]] += 1; 1644 for (i=0;i<n;i++) 1645 if (!iwork[i]) 1646 interior_dofs[n_interior_dofs++] = i; 1647 ierr = PetscFree(iwork);CHKERRQ(ierr); 1648 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1649 } 1650 if (has_null_pressures) { 1651 IS *subs; 1652 PetscInt nsubs,i,j,nl; 1653 const PetscInt *idxs; 1654 PetscScalar *array; 1655 Vec *work; 1656 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1657 1658 subs = pcbddc->local_subs; 1659 nsubs = pcbddc->n_local_subs; 1660 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1661 if (pcbddc->current_level) { 1662 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1663 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1664 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1665 /* work[0] = 1_p */ 1666 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1667 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1668 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1669 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1670 /* work[0] = 1_v */ 1671 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1672 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1673 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1674 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1675 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1676 } 1677 if (nsubs > 1) { 1678 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1679 for (i=0;i<nsubs;i++) { 1680 ISLocalToGlobalMapping l2g; 1681 IS t_zerodiag_subs; 1682 PetscInt nl; 1683 1684 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1685 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1686 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1687 if (nl) { 1688 PetscBool valid = PETSC_TRUE; 1689 1690 if (pcbddc->current_level) { 1691 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1692 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1693 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1694 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1695 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1696 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1697 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 1698 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 1699 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1700 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 1701 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1702 for (j=0;j<n_interior_dofs;j++) { 1703 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1704 valid = PETSC_FALSE; 1705 break; 1706 } 1707 } 1708 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1709 } 1710 if (valid && pcbddc->NeumannBoundariesLocal) { 1711 IS t_bc; 1712 PetscInt nzb; 1713 1714 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 1715 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 1716 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 1717 if (nzb) valid = PETSC_FALSE; 1718 } 1719 if (valid && pressures) { 1720 IS t_pressure_subs; 1721 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 1722 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 1723 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 1724 } 1725 if (valid) { 1726 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 1727 pcbddc->benign_n++; 1728 } else { 1729 recompute_zerodiag = PETSC_TRUE; 1730 } 1731 } 1732 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 1733 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 1734 } 1735 } else { /* there's just one subdomain (or zero if they have not been detected */ 1736 PetscBool valid = PETSC_TRUE; 1737 1738 if (pcbddc->NeumannBoundariesLocal) { 1739 PetscInt nzb; 1740 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 1741 if (nzb) valid = PETSC_FALSE; 1742 } 1743 if (valid && pressures) { 1744 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 1745 } 1746 if (valid && pcbddc->current_level) { 1747 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 1748 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 1749 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1750 for (j=0;j<n_interior_dofs;j++) { 1751 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1752 valid = PETSC_FALSE; 1753 break; 1754 } 1755 } 1756 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1757 } 1758 if (valid) { 1759 pcbddc->benign_n = 1; 1760 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 1761 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 1762 zerodiag_subs[0] = zerodiag; 1763 } 1764 } 1765 if (pcbddc->current_level) { 1766 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 1767 } 1768 } 1769 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 1770 1771 if (!pcbddc->benign_n) { 1772 PetscInt n; 1773 1774 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1775 recompute_zerodiag = PETSC_FALSE; 1776 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1777 if (n) { 1778 has_null_pressures = PETSC_FALSE; 1779 have_null = PETSC_FALSE; 1780 } 1781 } 1782 1783 /* final check for null pressures */ 1784 if (zerodiag && pressures) { 1785 PetscInt nz,np; 1786 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1787 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 1788 if (nz != np) have_null = PETSC_FALSE; 1789 } 1790 1791 if (recompute_zerodiag) { 1792 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1793 if (pcbddc->benign_n == 1) { 1794 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 1795 zerodiag = zerodiag_subs[0]; 1796 } else { 1797 PetscInt i,nzn,*new_idxs; 1798 1799 nzn = 0; 1800 for (i=0;i<pcbddc->benign_n;i++) { 1801 PetscInt ns; 1802 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 1803 nzn += ns; 1804 } 1805 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 1806 nzn = 0; 1807 for (i=0;i<pcbddc->benign_n;i++) { 1808 PetscInt ns,*idxs; 1809 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 1810 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1811 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 1812 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1813 nzn += ns; 1814 } 1815 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 1816 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 1817 } 1818 have_null = PETSC_FALSE; 1819 } 1820 1821 /* Prepare matrix to compute no-net-flux */ 1822 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 1823 Mat A,loc_divudotp; 1824 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 1825 IS row,col,isused = NULL; 1826 PetscInt M,N,n,st,n_isused; 1827 1828 if (pressures) { 1829 isused = pressures; 1830 } else { 1831 isused = zerodiag; 1832 } 1833 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 1834 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1835 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 1836 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 1837 n_isused = 0; 1838 if (isused) { 1839 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 1840 } 1841 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1842 st = st-n_isused; 1843 if (n) { 1844 const PetscInt *gidxs; 1845 1846 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 1847 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1848 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 1849 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1850 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1851 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1852 } else { 1853 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 1854 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1855 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1856 } 1857 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 1858 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 1859 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 1860 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 1861 ierr = ISDestroy(&row);CHKERRQ(ierr); 1862 ierr = ISDestroy(&col);CHKERRQ(ierr); 1863 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 1864 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 1865 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1866 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1867 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1868 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1869 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1870 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1871 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1872 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1873 } 1874 1875 /* change of basis and p0 dofs */ 1876 if (has_null_pressures) { 1877 IS zerodiagc; 1878 const PetscInt *idxs,*idxsc; 1879 PetscInt i,s,*nnz; 1880 1881 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1882 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1883 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1884 /* local change of basis for pressures */ 1885 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1886 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1887 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1888 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1889 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1890 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1891 for (i=0;i<pcbddc->benign_n;i++) { 1892 PetscInt nzs,j; 1893 1894 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1895 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1896 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1897 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1898 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1899 } 1900 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1901 ierr = PetscFree(nnz);CHKERRQ(ierr); 1902 /* set identity on velocities */ 1903 for (i=0;i<n-nz;i++) { 1904 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1905 } 1906 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1907 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1908 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1909 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1910 /* set change on pressures */ 1911 for (s=0;s<pcbddc->benign_n;s++) { 1912 PetscScalar *array; 1913 PetscInt nzs; 1914 1915 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1916 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1917 for (i=0;i<nzs-1;i++) { 1918 PetscScalar vals[2]; 1919 PetscInt cols[2]; 1920 1921 cols[0] = idxs[i]; 1922 cols[1] = idxs[nzs-1]; 1923 vals[0] = 1.; 1924 vals[1] = 1.; 1925 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1926 } 1927 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1928 for (i=0;i<nzs-1;i++) array[i] = -1.; 1929 array[nzs-1] = 1.; 1930 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1931 /* store local idxs for p0 */ 1932 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1933 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1934 ierr = PetscFree(array);CHKERRQ(ierr); 1935 } 1936 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1937 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1938 /* project if needed */ 1939 if (pcbddc->benign_change_explicit) { 1940 Mat M; 1941 1942 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1943 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1944 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1945 ierr = MatDestroy(&M);CHKERRQ(ierr); 1946 } 1947 /* store global idxs for p0 */ 1948 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1949 } 1950 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1951 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1952 1953 /* determines if the coarse solver will be singular or not */ 1954 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1955 /* determines if the problem has subdomains with 0 pressure block */ 1956 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1957 *zerodiaglocal = zerodiag; 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1963 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1964 { 1965 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1966 PetscScalar *array; 1967 PetscErrorCode ierr; 1968 1969 PetscFunctionBegin; 1970 if (!pcbddc->benign_sf) { 1971 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1972 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1973 } 1974 if (get) { 1975 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1976 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1977 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1978 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1979 } else { 1980 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1981 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1982 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1983 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1984 } 1985 PetscFunctionReturn(0); 1986 } 1987 1988 #undef __FUNCT__ 1989 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1990 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1991 { 1992 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1993 PetscErrorCode ierr; 1994 1995 PetscFunctionBegin; 1996 /* TODO: add error checking 1997 - avoid nested pop (or push) calls. 1998 - cannot push before pop. 1999 - cannot call this if pcbddc->local_mat is NULL 2000 */ 2001 if (!pcbddc->benign_n) { 2002 PetscFunctionReturn(0); 2003 } 2004 if (pop) { 2005 if (pcbddc->benign_change_explicit) { 2006 IS is_p0; 2007 MatReuse reuse; 2008 2009 /* extract B_0 */ 2010 reuse = MAT_INITIAL_MATRIX; 2011 if (pcbddc->benign_B0) { 2012 reuse = MAT_REUSE_MATRIX; 2013 } 2014 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2015 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2016 /* remove rows and cols from local problem */ 2017 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2018 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2019 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2020 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2021 } else { 2022 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2023 PetscScalar *vals; 2024 PetscInt i,n,*idxs_ins; 2025 2026 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2027 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2028 if (!pcbddc->benign_B0) { 2029 PetscInt *nnz; 2030 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2031 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2032 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2033 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2034 for (i=0;i<pcbddc->benign_n;i++) { 2035 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2036 nnz[i] = n - nnz[i]; 2037 } 2038 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2039 ierr = PetscFree(nnz);CHKERRQ(ierr); 2040 } 2041 2042 for (i=0;i<pcbddc->benign_n;i++) { 2043 PetscScalar *array; 2044 PetscInt *idxs,j,nz,cum; 2045 2046 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2047 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2048 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2049 for (j=0;j<nz;j++) vals[j] = 1.; 2050 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2051 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2052 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2053 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2054 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2055 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2056 cum = 0; 2057 for (j=0;j<n;j++) { 2058 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2059 vals[cum] = array[j]; 2060 idxs_ins[cum] = j; 2061 cum++; 2062 } 2063 } 2064 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2065 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2066 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2067 } 2068 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2070 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2071 } 2072 } else { /* push */ 2073 if (pcbddc->benign_change_explicit) { 2074 PetscInt i; 2075 2076 for (i=0;i<pcbddc->benign_n;i++) { 2077 PetscScalar *B0_vals; 2078 PetscInt *B0_cols,B0_ncol; 2079 2080 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2081 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2082 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2083 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2084 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2085 } 2086 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2088 } else { 2089 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2090 } 2091 } 2092 PetscFunctionReturn(0); 2093 } 2094 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2097 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2098 { 2099 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2100 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2101 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2102 PetscBLASInt *B_iwork,*B_ifail; 2103 PetscScalar *work,lwork; 2104 PetscScalar *St,*S,*eigv; 2105 PetscScalar *Sarray,*Starray; 2106 PetscReal *eigs,thresh; 2107 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2108 PetscBool allocated_S_St; 2109 #if defined(PETSC_USE_COMPLEX) 2110 PetscReal *rwork; 2111 #endif 2112 PetscErrorCode ierr; 2113 2114 PetscFunctionBegin; 2115 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2116 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2117 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2118 2119 if (pcbddc->dbg_flag) { 2120 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2121 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2122 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2123 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2124 } 2125 2126 if (pcbddc->dbg_flag) { 2127 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2128 } 2129 2130 /* max size of subsets */ 2131 mss = 0; 2132 for (i=0;i<sub_schurs->n_subs;i++) { 2133 PetscInt subset_size; 2134 2135 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2136 mss = PetscMax(mss,subset_size); 2137 } 2138 2139 /* min/max and threshold */ 2140 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2141 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2142 nmax = PetscMax(nmin,nmax); 2143 allocated_S_St = PETSC_FALSE; 2144 if (nmin) { 2145 allocated_S_St = PETSC_TRUE; 2146 } 2147 2148 /* allocate lapack workspace */ 2149 cum = cum2 = 0; 2150 maxneigs = 0; 2151 for (i=0;i<sub_schurs->n_subs;i++) { 2152 PetscInt n,subset_size; 2153 2154 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2155 n = PetscMin(subset_size,nmax); 2156 cum += subset_size; 2157 cum2 += subset_size*n; 2158 maxneigs = PetscMax(maxneigs,n); 2159 } 2160 if (mss) { 2161 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2162 PetscBLASInt B_itype = 1; 2163 PetscBLASInt B_N = mss; 2164 PetscReal zero = 0.0; 2165 PetscReal eps = 0.0; /* dlamch? */ 2166 2167 B_lwork = -1; 2168 S = NULL; 2169 St = NULL; 2170 eigs = NULL; 2171 eigv = NULL; 2172 B_iwork = NULL; 2173 B_ifail = NULL; 2174 #if defined(PETSC_USE_COMPLEX) 2175 rwork = NULL; 2176 #endif 2177 thresh = 1.0; 2178 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2179 #if defined(PETSC_USE_COMPLEX) 2180 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2181 #else 2182 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2183 #endif 2184 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2185 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2186 } else { 2187 /* TODO */ 2188 } 2189 } else { 2190 lwork = 0; 2191 } 2192 2193 nv = 0; 2194 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2195 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2196 } 2197 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2198 if (allocated_S_St) { 2199 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2200 } 2201 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2202 #if defined(PETSC_USE_COMPLEX) 2203 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2204 #endif 2205 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2206 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2207 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2208 nv+cum,&pcbddc->adaptive_constraints_idxs, 2209 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2210 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2211 2212 maxneigs = 0; 2213 cum = cumarray = 0; 2214 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2215 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2216 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2217 const PetscInt *idxs; 2218 2219 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2220 for (cum=0;cum<nv;cum++) { 2221 pcbddc->adaptive_constraints_n[cum] = 1; 2222 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2223 pcbddc->adaptive_constraints_data[cum] = 1.0; 2224 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2225 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2226 } 2227 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2228 } 2229 2230 if (mss) { /* multilevel */ 2231 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2232 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2233 } 2234 2235 thresh = pcbddc->adaptive_threshold; 2236 for (i=0;i<sub_schurs->n_subs;i++) { 2237 const PetscInt *idxs; 2238 PetscReal upper,lower; 2239 PetscInt j,subset_size,eigs_start = 0; 2240 PetscBLASInt B_N; 2241 PetscBool same_data = PETSC_FALSE; 2242 2243 if (pcbddc->use_deluxe_scaling) { 2244 upper = PETSC_MAX_REAL; 2245 lower = thresh; 2246 } else { 2247 upper = 1./thresh; 2248 lower = 0.; 2249 } 2250 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2251 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2252 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2253 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2254 if (sub_schurs->is_hermitian) { 2255 PetscInt j,k; 2256 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2257 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2258 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2259 } 2260 for (j=0;j<subset_size;j++) { 2261 for (k=j;k<subset_size;k++) { 2262 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2263 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2264 } 2265 } 2266 } else { 2267 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2268 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2269 } 2270 } else { 2271 S = Sarray + cumarray; 2272 St = Starray + cumarray; 2273 } 2274 /* see if we can save some work */ 2275 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2276 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2277 } 2278 2279 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2280 B_neigs = 0; 2281 } else { 2282 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2283 PetscBLASInt B_itype = 1; 2284 PetscBLASInt B_IL, B_IU; 2285 PetscReal eps = -1.0; /* dlamch? */ 2286 PetscInt nmin_s; 2287 PetscBool compute_range = PETSC_FALSE; 2288 2289 if (pcbddc->dbg_flag) { 2290 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2291 } 2292 2293 compute_range = PETSC_FALSE; 2294 if (thresh > 1.+PETSC_SMALL && !same_data) { 2295 compute_range = PETSC_TRUE; 2296 } 2297 2298 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2299 if (compute_range) { 2300 2301 /* ask for eigenvalues larger than thresh */ 2302 #if defined(PETSC_USE_COMPLEX) 2303 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2304 #else 2305 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2306 #endif 2307 } else if (!same_data) { 2308 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2309 B_IL = 1; 2310 #if defined(PETSC_USE_COMPLEX) 2311 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2312 #else 2313 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2314 #endif 2315 } else { /* same_data is true, so get the adaptive function requested by the user */ 2316 PetscInt k; 2317 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2318 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2319 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2320 nmin = nmax; 2321 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2322 for (k=0;k<nmax;k++) { 2323 eigs[k] = 1./PETSC_SMALL; 2324 eigv[k*(subset_size+1)] = 1.0; 2325 } 2326 } 2327 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2328 if (B_ierr) { 2329 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2330 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2331 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2332 } 2333 2334 if (B_neigs > nmax) { 2335 if (pcbddc->dbg_flag) { 2336 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2337 } 2338 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2339 B_neigs = nmax; 2340 } 2341 2342 nmin_s = PetscMin(nmin,B_N); 2343 if (B_neigs < nmin_s) { 2344 PetscBLASInt B_neigs2; 2345 2346 if (pcbddc->use_deluxe_scaling) { 2347 B_IL = B_N - nmin_s + 1; 2348 B_IU = B_N - B_neigs; 2349 } else { 2350 B_IL = B_neigs + 1; 2351 B_IU = nmin_s; 2352 } 2353 if (pcbddc->dbg_flag) { 2354 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2355 } 2356 if (sub_schurs->is_hermitian) { 2357 PetscInt j,k; 2358 for (j=0;j<subset_size;j++) { 2359 for (k=j;k<subset_size;k++) { 2360 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2361 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2362 } 2363 } 2364 } else { 2365 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2366 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2367 } 2368 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2369 #if defined(PETSC_USE_COMPLEX) 2370 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2371 #else 2372 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2373 #endif 2374 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2375 B_neigs += B_neigs2; 2376 } 2377 if (B_ierr) { 2378 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2379 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2380 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2381 } 2382 if (pcbddc->dbg_flag) { 2383 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2384 for (j=0;j<B_neigs;j++) { 2385 if (eigs[j] == 0.0) { 2386 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2387 } else { 2388 if (pcbddc->use_deluxe_scaling) { 2389 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2390 } else { 2391 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2392 } 2393 } 2394 } 2395 } 2396 } else { 2397 /* TODO */ 2398 } 2399 } 2400 /* change the basis back to the original one */ 2401 if (sub_schurs->change) { 2402 Mat change,phi,phit; 2403 2404 if (pcbddc->dbg_flag > 1) { 2405 PetscInt ii; 2406 for (ii=0;ii<B_neigs;ii++) { 2407 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2408 for (j=0;j<B_N;j++) { 2409 #if defined(PETSC_USE_COMPLEX) 2410 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2411 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2413 #else 2414 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2415 #endif 2416 } 2417 } 2418 } 2419 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2420 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2421 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2422 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2423 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2424 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2425 } 2426 maxneigs = PetscMax(B_neigs,maxneigs); 2427 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2428 if (B_neigs) { 2429 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2430 2431 if (pcbddc->dbg_flag > 1) { 2432 PetscInt ii; 2433 for (ii=0;ii<B_neigs;ii++) { 2434 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2435 for (j=0;j<B_N;j++) { 2436 #if defined(PETSC_USE_COMPLEX) 2437 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2438 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2439 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2440 #else 2441 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2442 #endif 2443 } 2444 } 2445 } 2446 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2447 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2448 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2449 cum++; 2450 } 2451 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2452 /* shift for next computation */ 2453 cumarray += subset_size*subset_size; 2454 } 2455 if (pcbddc->dbg_flag) { 2456 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2457 } 2458 2459 if (mss) { 2460 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2461 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2462 /* destroy matrices (junk) */ 2463 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2464 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2465 } 2466 if (allocated_S_St) { 2467 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2468 } 2469 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2470 #if defined(PETSC_USE_COMPLEX) 2471 ierr = PetscFree(rwork);CHKERRQ(ierr); 2472 #endif 2473 if (pcbddc->dbg_flag) { 2474 PetscInt maxneigs_r; 2475 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2476 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2477 } 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "PCBDDCSetUpSolvers" 2483 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2484 { 2485 PetscScalar *coarse_submat_vals; 2486 PetscErrorCode ierr; 2487 2488 PetscFunctionBegin; 2489 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2490 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2491 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2492 2493 /* Setup local neumann solver ksp_R */ 2494 /* PCBDDCSetUpLocalScatters should be called first! */ 2495 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2496 2497 /* 2498 Setup local correction and local part of coarse basis. 2499 Gives back the dense local part of the coarse matrix in column major ordering 2500 */ 2501 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2502 2503 /* Compute total number of coarse nodes and setup coarse solver */ 2504 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2505 2506 /* free */ 2507 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2508 PetscFunctionReturn(0); 2509 } 2510 2511 #undef __FUNCT__ 2512 #define __FUNCT__ "PCBDDCResetCustomization" 2513 PetscErrorCode PCBDDCResetCustomization(PC pc) 2514 { 2515 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2516 PetscErrorCode ierr; 2517 2518 PetscFunctionBegin; 2519 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2520 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2521 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2522 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2523 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2524 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2525 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2526 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2527 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2528 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2529 PetscFunctionReturn(0); 2530 } 2531 2532 #undef __FUNCT__ 2533 #define __FUNCT__ "PCBDDCResetTopography" 2534 PetscErrorCode PCBDDCResetTopography(PC pc) 2535 { 2536 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2537 PetscInt i; 2538 PetscErrorCode ierr; 2539 2540 PetscFunctionBegin; 2541 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2542 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2543 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2544 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2545 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2546 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2547 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2548 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2549 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2550 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2551 pcbddc->graphanalyzed = PETSC_FALSE; 2552 for (i=0;i<pcbddc->n_local_subs;i++) { 2553 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2554 } 2555 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2556 if (pcbddc->sub_schurs) { 2557 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2558 } 2559 PetscFunctionReturn(0); 2560 } 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "PCBDDCResetSolvers" 2564 PetscErrorCode PCBDDCResetSolvers(PC pc) 2565 { 2566 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2567 PetscErrorCode ierr; 2568 2569 PetscFunctionBegin; 2570 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2571 if (pcbddc->coarse_phi_B) { 2572 PetscScalar *array; 2573 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2574 ierr = PetscFree(array);CHKERRQ(ierr); 2575 } 2576 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2577 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2578 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2579 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2580 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2581 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2582 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2583 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2584 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2585 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2586 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2587 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2588 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2589 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2590 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2591 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2592 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2593 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2594 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2595 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2596 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2597 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2598 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2599 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2600 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2601 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2602 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2603 if (pcbddc->benign_zerodiag_subs) { 2604 PetscInt i; 2605 for (i=0;i<pcbddc->benign_n;i++) { 2606 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2607 } 2608 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2609 } 2610 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2611 PetscFunctionReturn(0); 2612 } 2613 2614 #undef __FUNCT__ 2615 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2616 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2617 { 2618 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2619 PC_IS *pcis = (PC_IS*)pc->data; 2620 VecType impVecType; 2621 PetscInt n_constraints,n_R,old_size; 2622 PetscErrorCode ierr; 2623 2624 PetscFunctionBegin; 2625 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2626 /* get sizes */ 2627 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2628 n_R = pcis->n - pcbddc->n_vertices; 2629 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2630 /* local work vectors (try to avoid unneeded work)*/ 2631 /* R nodes */ 2632 old_size = -1; 2633 if (pcbddc->vec1_R) { 2634 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2635 } 2636 if (n_R != old_size) { 2637 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2638 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2639 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2640 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2641 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2642 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2643 } 2644 /* local primal dofs */ 2645 old_size = -1; 2646 if (pcbddc->vec1_P) { 2647 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2648 } 2649 if (pcbddc->local_primal_size != old_size) { 2650 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2651 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2652 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2653 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2654 } 2655 /* local explicit constraints */ 2656 old_size = -1; 2657 if (pcbddc->vec1_C) { 2658 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2659 } 2660 if (n_constraints && n_constraints != old_size) { 2661 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2662 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2663 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2664 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2665 } 2666 PetscFunctionReturn(0); 2667 } 2668 2669 #undef __FUNCT__ 2670 #define __FUNCT__ "PCBDDCSetUpCorrection" 2671 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2672 { 2673 PetscErrorCode ierr; 2674 /* pointers to pcis and pcbddc */ 2675 PC_IS* pcis = (PC_IS*)pc->data; 2676 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2677 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2678 /* submatrices of local problem */ 2679 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2680 /* submatrices of local coarse problem */ 2681 Mat S_VV,S_CV,S_VC,S_CC; 2682 /* working matrices */ 2683 Mat C_CR; 2684 /* additional working stuff */ 2685 PC pc_R; 2686 Mat F; 2687 Vec dummy_vec; 2688 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2689 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2690 PetscScalar *work; 2691 PetscInt *idx_V_B; 2692 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2693 PetscInt i,n_R,n_D,n_B; 2694 2695 /* some shortcuts to scalars */ 2696 PetscScalar one=1.0,m_one=-1.0; 2697 2698 PetscFunctionBegin; 2699 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 2700 2701 /* Set Non-overlapping dimensions */ 2702 n_vertices = pcbddc->n_vertices; 2703 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 2704 n_B = pcis->n_B; 2705 n_D = pcis->n - n_B; 2706 n_R = pcis->n - n_vertices; 2707 2708 /* vertices in boundary numbering */ 2709 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 2710 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 2711 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 2712 2713 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2714 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 2715 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 2716 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 2717 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 2718 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 2719 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 2720 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 2721 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 2722 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 2723 2724 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 2725 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 2726 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 2727 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 2728 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 2729 lda_rhs = n_R; 2730 need_benign_correction = PETSC_FALSE; 2731 if (isLU || isILU || isCHOL) { 2732 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 2733 } else if (sub_schurs && sub_schurs->reuse_solver) { 2734 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2735 MatFactorType type; 2736 2737 F = reuse_solver->F; 2738 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 2739 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 2740 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 2741 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 2742 } else { 2743 F = NULL; 2744 } 2745 2746 /* allocate workspace */ 2747 n = 0; 2748 if (n_constraints) { 2749 n += lda_rhs*n_constraints; 2750 } 2751 if (n_vertices) { 2752 n = PetscMax(2*lda_rhs*n_vertices,n); 2753 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 2754 } 2755 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 2756 2757 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 2758 dummy_vec = NULL; 2759 if (need_benign_correction && lda_rhs != n_R && F) { 2760 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 2761 } 2762 2763 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2764 if (n_constraints) { 2765 Mat M1,M2,M3,C_B; 2766 IS is_aux; 2767 PetscScalar *array,*array2; 2768 2769 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2770 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2771 2772 /* Extract constraints on R nodes: C_{CR} */ 2773 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 2774 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2775 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2776 2777 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 2778 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 2779 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2780 for (i=0;i<n_constraints;i++) { 2781 const PetscScalar *row_cmat_values; 2782 const PetscInt *row_cmat_indices; 2783 PetscInt size_of_constraint,j; 2784 2785 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 2786 for (j=0;j<size_of_constraint;j++) { 2787 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 2788 } 2789 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 2790 } 2791 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 2792 if (F) { 2793 Mat B; 2794 2795 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2796 if (need_benign_correction) { 2797 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2798 2799 /* rhs is already zero on interior dofs, no need to change the rhs */ 2800 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 2801 } 2802 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 2803 if (need_benign_correction) { 2804 PetscScalar *marr; 2805 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2806 2807 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2808 if (lda_rhs != n_R) { 2809 for (i=0;i<n_constraints;i++) { 2810 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2811 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2812 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2813 } 2814 } else { 2815 for (i=0;i<n_constraints;i++) { 2816 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2817 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2818 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2819 } 2820 } 2821 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2822 } 2823 ierr = MatDestroy(&B);CHKERRQ(ierr); 2824 } else { 2825 PetscScalar *marr; 2826 2827 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2828 for (i=0;i<n_constraints;i++) { 2829 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2830 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 2831 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2832 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2833 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2834 } 2835 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2836 } 2837 if (!pcbddc->switch_static) { 2838 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2839 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 2840 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 2841 for (i=0;i<n_constraints;i++) { 2842 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 2843 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 2844 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2845 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2846 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2847 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2848 } 2849 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 2850 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 2851 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2852 } else { 2853 if (lda_rhs != n_R) { 2854 IS dummy; 2855 2856 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 2857 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2858 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 2859 } else { 2860 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 2861 pcbddc->local_auxmat2 = local_auxmat2_R; 2862 } 2863 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2864 } 2865 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2866 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2867 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 2868 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 2869 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 2870 if (isCHOL) { 2871 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 2872 } else { 2873 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 2874 } 2875 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2876 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2877 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2878 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2879 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2880 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2881 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2882 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2883 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2884 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2885 } 2886 2887 /* Get submatrices from subdomain matrix */ 2888 if (n_vertices) { 2889 IS is_aux; 2890 2891 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2892 IS tis; 2893 2894 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2895 ierr = ISSort(tis);CHKERRQ(ierr); 2896 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2897 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2898 } else { 2899 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2900 } 2901 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2902 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2903 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2904 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2905 } 2906 2907 /* Matrix of coarse basis functions (local) */ 2908 if (pcbddc->coarse_phi_B) { 2909 PetscInt on_B,on_primal,on_D=n_D; 2910 if (pcbddc->coarse_phi_D) { 2911 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2912 } 2913 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2914 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2915 PetscScalar *marray; 2916 2917 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2918 ierr = PetscFree(marray);CHKERRQ(ierr); 2919 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2920 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2921 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2922 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2923 } 2924 } 2925 2926 if (!pcbddc->coarse_phi_B) { 2927 PetscScalar *marray; 2928 2929 n = n_B*pcbddc->local_primal_size; 2930 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2931 n += n_D*pcbddc->local_primal_size; 2932 } 2933 if (!pcbddc->symmetric_primal) { 2934 n *= 2; 2935 } 2936 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2937 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2938 n = n_B*pcbddc->local_primal_size; 2939 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2940 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2941 n += n_D*pcbddc->local_primal_size; 2942 } 2943 if (!pcbddc->symmetric_primal) { 2944 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2945 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2946 n = n_B*pcbddc->local_primal_size; 2947 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2948 } 2949 } else { 2950 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2951 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2952 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2953 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2954 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2955 } 2956 } 2957 } 2958 2959 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2960 p0_lidx_I = NULL; 2961 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2962 const PetscInt *idxs; 2963 2964 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2965 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2966 for (i=0;i<pcbddc->benign_n;i++) { 2967 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2968 } 2969 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2970 } 2971 2972 /* vertices */ 2973 if (n_vertices) { 2974 2975 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2976 2977 if (n_R) { 2978 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2979 PetscBLASInt B_N,B_one = 1; 2980 PetscScalar *x,*y; 2981 PetscBool isseqaij; 2982 2983 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2984 if (need_benign_correction) { 2985 ISLocalToGlobalMapping RtoN; 2986 IS is_p0; 2987 PetscInt *idxs_p0,n; 2988 2989 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2990 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2991 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2992 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2993 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2994 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2995 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2996 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2997 } 2998 2999 if (lda_rhs == n_R) { 3000 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3001 } else { 3002 PetscScalar *av,*array; 3003 const PetscInt *xadj,*adjncy; 3004 PetscInt n; 3005 PetscBool flg_row; 3006 3007 array = work+lda_rhs*n_vertices; 3008 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3009 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3010 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3011 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3012 for (i=0;i<n;i++) { 3013 PetscInt j; 3014 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3015 } 3016 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3017 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3018 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3019 } 3020 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3021 if (need_benign_correction) { 3022 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3023 PetscScalar *marr; 3024 3025 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3026 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3027 3028 | 0 0 0 | (V) 3029 L = | 0 0 -1 | (P-p0) 3030 | 0 0 -1 | (p0) 3031 3032 */ 3033 for (i=0;i<reuse_solver->benign_n;i++) { 3034 const PetscScalar *vals; 3035 const PetscInt *idxs,*idxs_zero; 3036 PetscInt n,j,nz; 3037 3038 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3039 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3040 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3041 for (j=0;j<n;j++) { 3042 PetscScalar val = vals[j]; 3043 PetscInt k,col = idxs[j]; 3044 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3045 } 3046 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3047 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3048 } 3049 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3050 } 3051 if (F) { 3052 /* need to correct the rhs */ 3053 if (need_benign_correction) { 3054 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3055 PetscScalar *marr; 3056 3057 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3058 if (lda_rhs != n_R) { 3059 for (i=0;i<n_vertices;i++) { 3060 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3061 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3062 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3063 } 3064 } else { 3065 for (i=0;i<n_vertices;i++) { 3066 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3067 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3068 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3069 } 3070 } 3071 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3072 } 3073 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3074 /* need to correct the solution */ 3075 if (need_benign_correction) { 3076 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3077 PetscScalar *marr; 3078 3079 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3080 if (lda_rhs != n_R) { 3081 for (i=0;i<n_vertices;i++) { 3082 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3083 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3084 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3085 } 3086 } else { 3087 for (i=0;i<n_vertices;i++) { 3088 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3089 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3090 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3091 } 3092 } 3093 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3094 } 3095 } else { 3096 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3097 for (i=0;i<n_vertices;i++) { 3098 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3099 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3100 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3101 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3102 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3103 } 3104 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3105 } 3106 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3107 /* S_VV and S_CV */ 3108 if (n_constraints) { 3109 Mat B; 3110 3111 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3112 for (i=0;i<n_vertices;i++) { 3113 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3114 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3115 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3116 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3117 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3118 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3119 } 3120 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3121 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3122 ierr = MatDestroy(&B);CHKERRQ(ierr); 3123 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3124 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3125 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3126 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3127 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3128 ierr = MatDestroy(&B);CHKERRQ(ierr); 3129 } 3130 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3131 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3132 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3133 } 3134 if (lda_rhs != n_R) { 3135 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3136 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3137 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3138 } 3139 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3140 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3141 if (need_benign_correction) { 3142 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3143 PetscScalar *marr,*sums; 3144 3145 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3146 ierr = MatDenseGetArray(S_VVt,&marr); 3147 for (i=0;i<reuse_solver->benign_n;i++) { 3148 const PetscScalar *vals; 3149 const PetscInt *idxs,*idxs_zero; 3150 PetscInt n,j,nz; 3151 3152 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3153 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3154 for (j=0;j<n_vertices;j++) { 3155 PetscInt k; 3156 sums[j] = 0.; 3157 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3158 } 3159 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3160 for (j=0;j<n;j++) { 3161 PetscScalar val = vals[j]; 3162 PetscInt k; 3163 for (k=0;k<n_vertices;k++) { 3164 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3165 } 3166 } 3167 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3168 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3169 } 3170 ierr = PetscFree(sums);CHKERRQ(ierr); 3171 ierr = MatDenseRestoreArray(S_VVt,&marr); 3172 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3173 } 3174 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3175 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3176 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3177 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3178 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3179 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3180 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3181 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3182 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3183 } else { 3184 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3185 } 3186 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3187 3188 /* coarse basis functions */ 3189 for (i=0;i<n_vertices;i++) { 3190 PetscScalar *y; 3191 3192 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3193 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3194 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3195 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3196 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3197 y[n_B*i+idx_V_B[i]] = 1.0; 3198 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3199 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3200 3201 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3202 PetscInt j; 3203 3204 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3205 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3206 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3207 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3208 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3209 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3210 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3211 } 3212 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3213 } 3214 /* if n_R == 0 the object is not destroyed */ 3215 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3216 } 3217 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3218 3219 if (n_constraints) { 3220 Mat B; 3221 3222 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3223 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3224 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3225 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3226 if (n_vertices) { 3227 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3228 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3229 } else { 3230 Mat S_VCt; 3231 3232 if (lda_rhs != n_R) { 3233 ierr = MatDestroy(&B);CHKERRQ(ierr); 3234 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3235 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3236 } 3237 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3238 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3239 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3240 } 3241 } 3242 ierr = MatDestroy(&B);CHKERRQ(ierr); 3243 /* coarse basis functions */ 3244 for (i=0;i<n_constraints;i++) { 3245 PetscScalar *y; 3246 3247 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3248 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3249 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3250 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3251 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3252 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3253 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3254 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3255 PetscInt j; 3256 3257 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3258 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3259 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3260 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3261 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3262 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3263 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3264 } 3265 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3266 } 3267 } 3268 if (n_constraints) { 3269 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3270 } 3271 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3272 3273 /* coarse matrix entries relative to B_0 */ 3274 if (pcbddc->benign_n) { 3275 Mat B0_B,B0_BPHI; 3276 IS is_dummy; 3277 PetscScalar *data; 3278 PetscInt j; 3279 3280 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3281 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3282 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3283 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3284 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3285 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3286 for (j=0;j<pcbddc->benign_n;j++) { 3287 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3288 for (i=0;i<pcbddc->local_primal_size;i++) { 3289 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3290 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3291 } 3292 } 3293 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3294 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3295 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3296 } 3297 3298 /* compute other basis functions for non-symmetric problems */ 3299 if (!pcbddc->symmetric_primal) { 3300 Mat B_V=NULL,B_C=NULL; 3301 PetscScalar *marray; 3302 3303 if (n_constraints) { 3304 Mat S_CCT,C_CRT; 3305 3306 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3307 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3308 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3309 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3310 if (n_vertices) { 3311 Mat S_VCT; 3312 3313 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3314 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3315 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3316 } 3317 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3318 } else { 3319 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3320 } 3321 if (n_vertices && n_R) { 3322 PetscScalar *av,*marray; 3323 const PetscInt *xadj,*adjncy; 3324 PetscInt n; 3325 PetscBool flg_row; 3326 3327 /* B_V = B_V - A_VR^T */ 3328 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3329 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3330 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3331 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3332 for (i=0;i<n;i++) { 3333 PetscInt j; 3334 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3335 } 3336 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3337 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3338 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3339 } 3340 3341 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3342 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3343 for (i=0;i<n_vertices;i++) { 3344 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3345 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3346 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3347 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3348 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3349 } 3350 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3351 if (B_C) { 3352 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3353 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3354 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3355 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3356 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3357 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3358 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3359 } 3360 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3361 } 3362 /* coarse basis functions */ 3363 for (i=0;i<pcbddc->local_primal_size;i++) { 3364 PetscScalar *y; 3365 3366 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3367 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3368 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3369 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3370 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3371 if (i<n_vertices) { 3372 y[n_B*i+idx_V_B[i]] = 1.0; 3373 } 3374 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3375 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3376 3377 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3378 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3379 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3380 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3381 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3382 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3383 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3384 } 3385 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3386 } 3387 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3388 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3389 } 3390 /* free memory */ 3391 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3392 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3393 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3394 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3395 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3396 ierr = PetscFree(work);CHKERRQ(ierr); 3397 if (n_vertices) { 3398 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3399 } 3400 if (n_constraints) { 3401 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3402 } 3403 /* Checking coarse_sub_mat and coarse basis functios */ 3404 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3405 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3406 if (pcbddc->dbg_flag) { 3407 Mat coarse_sub_mat; 3408 Mat AUXMAT,TM1,TM2,TM3,TM4; 3409 Mat coarse_phi_D,coarse_phi_B; 3410 Mat coarse_psi_D,coarse_psi_B; 3411 Mat A_II,A_BB,A_IB,A_BI; 3412 Mat C_B,CPHI; 3413 IS is_dummy; 3414 Vec mones; 3415 MatType checkmattype=MATSEQAIJ; 3416 PetscReal real_value; 3417 3418 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3419 Mat A; 3420 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3421 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3422 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3423 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3424 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3425 ierr = MatDestroy(&A);CHKERRQ(ierr); 3426 } else { 3427 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3428 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3429 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3430 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3431 } 3432 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3433 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3434 if (!pcbddc->symmetric_primal) { 3435 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3436 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3437 } 3438 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3439 3440 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3441 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3442 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3443 if (!pcbddc->symmetric_primal) { 3444 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3445 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3446 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3447 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3448 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3449 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3450 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3451 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3452 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3453 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3454 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3455 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3456 } else { 3457 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3458 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3459 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3460 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3461 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3462 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3463 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3464 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3465 } 3466 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3467 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3468 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3469 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3470 if (pcbddc->benign_n) { 3471 Mat B0_B,B0_BPHI; 3472 PetscScalar *data,*data2; 3473 PetscInt j; 3474 3475 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3476 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3477 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3478 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3479 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3480 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3481 for (j=0;j<pcbddc->benign_n;j++) { 3482 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3483 for (i=0;i<pcbddc->local_primal_size;i++) { 3484 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3485 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3486 } 3487 } 3488 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3489 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3490 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3491 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3492 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3493 } 3494 #if 0 3495 { 3496 PetscViewer viewer; 3497 char filename[256]; 3498 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3499 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3500 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3501 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3502 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3503 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3504 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3505 if (save_change) { 3506 Mat phi_B; 3507 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3508 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3509 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3510 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3511 } else { 3512 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3513 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3514 } 3515 if (pcbddc->coarse_phi_D) { 3516 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3517 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3518 } 3519 if (pcbddc->coarse_psi_B) { 3520 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3521 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3522 } 3523 if (pcbddc->coarse_psi_D) { 3524 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3525 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3526 } 3527 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3528 } 3529 #endif 3530 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3531 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3532 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3533 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3534 3535 /* check constraints */ 3536 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3537 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3538 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3539 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3540 } else { 3541 PetscScalar *data; 3542 Mat tmat; 3543 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3544 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3545 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3546 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3547 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3548 } 3549 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3550 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3551 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3552 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3553 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3554 if (!pcbddc->symmetric_primal) { 3555 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3556 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3557 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3558 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3559 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3560 } 3561 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3562 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3563 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3564 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3565 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3566 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3567 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3568 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3569 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3570 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3571 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3572 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3573 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3574 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3575 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3576 if (!pcbddc->symmetric_primal) { 3577 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3578 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3579 } 3580 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3581 } 3582 /* get back data */ 3583 *coarse_submat_vals_n = coarse_submat_vals; 3584 PetscFunctionReturn(0); 3585 } 3586 3587 #undef __FUNCT__ 3588 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3589 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3590 { 3591 Mat *work_mat; 3592 IS isrow_s,iscol_s; 3593 PetscBool rsorted,csorted; 3594 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3595 PetscErrorCode ierr; 3596 3597 PetscFunctionBegin; 3598 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3599 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3600 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3601 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3602 3603 if (!rsorted) { 3604 const PetscInt *idxs; 3605 PetscInt *idxs_sorted,i; 3606 3607 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3608 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3609 for (i=0;i<rsize;i++) { 3610 idxs_perm_r[i] = i; 3611 } 3612 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3613 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3614 for (i=0;i<rsize;i++) { 3615 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3616 } 3617 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3618 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3619 } else { 3620 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3621 isrow_s = isrow; 3622 } 3623 3624 if (!csorted) { 3625 if (isrow == iscol) { 3626 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3627 iscol_s = isrow_s; 3628 } else { 3629 const PetscInt *idxs; 3630 PetscInt *idxs_sorted,i; 3631 3632 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3633 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3634 for (i=0;i<csize;i++) { 3635 idxs_perm_c[i] = i; 3636 } 3637 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3638 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3639 for (i=0;i<csize;i++) { 3640 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3641 } 3642 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3643 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3644 } 3645 } else { 3646 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3647 iscol_s = iscol; 3648 } 3649 3650 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3651 3652 if (!rsorted || !csorted) { 3653 Mat new_mat; 3654 IS is_perm_r,is_perm_c; 3655 3656 if (!rsorted) { 3657 PetscInt *idxs_r,i; 3658 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3659 for (i=0;i<rsize;i++) { 3660 idxs_r[idxs_perm_r[i]] = i; 3661 } 3662 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3663 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3664 } else { 3665 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3666 } 3667 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3668 3669 if (!csorted) { 3670 if (isrow_s == iscol_s) { 3671 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3672 is_perm_c = is_perm_r; 3673 } else { 3674 PetscInt *idxs_c,i; 3675 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3676 for (i=0;i<csize;i++) { 3677 idxs_c[idxs_perm_c[i]] = i; 3678 } 3679 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3680 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3681 } 3682 } else { 3683 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3684 } 3685 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3686 3687 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3688 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3689 work_mat[0] = new_mat; 3690 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3691 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3692 } 3693 3694 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 3695 *B = work_mat[0]; 3696 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 3697 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 3698 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 3699 PetscFunctionReturn(0); 3700 } 3701 3702 #undef __FUNCT__ 3703 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 3704 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 3705 { 3706 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3707 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3708 Mat new_mat; 3709 IS is_local,is_global; 3710 PetscInt local_size; 3711 PetscBool isseqaij; 3712 PetscErrorCode ierr; 3713 3714 PetscFunctionBegin; 3715 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3716 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 3717 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 3718 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 3719 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 3720 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 3721 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 3722 3723 /* check */ 3724 if (pcbddc->dbg_flag) { 3725 Vec x,x_change; 3726 PetscReal error; 3727 3728 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 3729 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3730 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 3731 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3732 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3733 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 3734 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3735 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3736 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3737 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3738 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3739 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 3740 ierr = VecDestroy(&x);CHKERRQ(ierr); 3741 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3742 } 3743 3744 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 3745 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3746 if (isseqaij) { 3747 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3748 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3749 } else { 3750 Mat work_mat; 3751 3752 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3753 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3754 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3755 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 3756 } 3757 if (matis->A->symmetric_set) { 3758 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 3759 #if !defined(PETSC_USE_COMPLEX) 3760 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 3761 #endif 3762 } 3763 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 3764 PetscFunctionReturn(0); 3765 } 3766 3767 #undef __FUNCT__ 3768 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 3769 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 3770 { 3771 PC_IS* pcis = (PC_IS*)(pc->data); 3772 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3773 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3774 PetscInt *idx_R_local=NULL; 3775 PetscInt n_vertices,i,j,n_R,n_D,n_B; 3776 PetscInt vbs,bs; 3777 PetscBT bitmask=NULL; 3778 PetscErrorCode ierr; 3779 3780 PetscFunctionBegin; 3781 /* 3782 No need to setup local scatters if 3783 - primal space is unchanged 3784 AND 3785 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 3786 AND 3787 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 3788 */ 3789 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 3790 PetscFunctionReturn(0); 3791 } 3792 /* destroy old objects */ 3793 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3794 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3795 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3796 /* Set Non-overlapping dimensions */ 3797 n_B = pcis->n_B; 3798 n_D = pcis->n - n_B; 3799 n_vertices = pcbddc->n_vertices; 3800 3801 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 3802 3803 /* create auxiliary bitmask and allocate workspace */ 3804 if (!sub_schurs || !sub_schurs->reuse_solver) { 3805 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 3806 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 3807 for (i=0;i<n_vertices;i++) { 3808 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 3809 } 3810 3811 for (i=0, n_R=0; i<pcis->n; i++) { 3812 if (!PetscBTLookup(bitmask,i)) { 3813 idx_R_local[n_R++] = i; 3814 } 3815 } 3816 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 3817 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3818 3819 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3820 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 3821 } 3822 3823 /* Block code */ 3824 vbs = 1; 3825 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 3826 if (bs>1 && !(n_vertices%bs)) { 3827 PetscBool is_blocked = PETSC_TRUE; 3828 PetscInt *vary; 3829 if (!sub_schurs || !sub_schurs->reuse_solver) { 3830 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 3831 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 3832 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 3833 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 3834 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 3835 for (i=0; i<pcis->n/bs; i++) { 3836 if (vary[i]!=0 && vary[i]!=bs) { 3837 is_blocked = PETSC_FALSE; 3838 break; 3839 } 3840 } 3841 ierr = PetscFree(vary);CHKERRQ(ierr); 3842 } else { 3843 /* Verify directly the R set */ 3844 for (i=0; i<n_R/bs; i++) { 3845 PetscInt j,node=idx_R_local[bs*i]; 3846 for (j=1; j<bs; j++) { 3847 if (node != idx_R_local[bs*i+j]-j) { 3848 is_blocked = PETSC_FALSE; 3849 break; 3850 } 3851 } 3852 } 3853 } 3854 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 3855 vbs = bs; 3856 for (i=0;i<n_R/vbs;i++) { 3857 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 3858 } 3859 } 3860 } 3861 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 3862 if (sub_schurs && sub_schurs->reuse_solver) { 3863 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3864 3865 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3866 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 3867 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 3868 reuse_solver->is_R = pcbddc->is_R_local; 3869 } else { 3870 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 3871 } 3872 3873 /* print some info if requested */ 3874 if (pcbddc->dbg_flag) { 3875 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3876 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3877 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3878 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3880 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3881 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3882 } 3883 3884 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3885 if (!sub_schurs || !sub_schurs->reuse_solver) { 3886 IS is_aux1,is_aux2; 3887 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3888 3889 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3890 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3891 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3892 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3893 for (i=0; i<n_D; i++) { 3894 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3895 } 3896 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3897 for (i=0, j=0; i<n_R; i++) { 3898 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3899 aux_array1[j++] = i; 3900 } 3901 } 3902 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3903 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3904 for (i=0, j=0; i<n_B; i++) { 3905 if (!PetscBTLookup(bitmask,is_indices[i])) { 3906 aux_array2[j++] = i; 3907 } 3908 } 3909 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3910 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3911 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3912 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3913 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3914 3915 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3916 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3917 for (i=0, j=0; i<n_R; i++) { 3918 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3919 aux_array1[j++] = i; 3920 } 3921 } 3922 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3923 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3924 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3925 } 3926 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3927 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3928 } else { 3929 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3930 IS tis; 3931 PetscInt schur_size; 3932 3933 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3934 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3935 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3936 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3937 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3938 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3939 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3940 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3941 } 3942 } 3943 PetscFunctionReturn(0); 3944 } 3945 3946 3947 #undef __FUNCT__ 3948 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3949 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3950 { 3951 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3952 PC_IS *pcis = (PC_IS*)pc->data; 3953 PC pc_temp; 3954 Mat A_RR; 3955 MatReuse reuse; 3956 PetscScalar m_one = -1.0; 3957 PetscReal value; 3958 PetscInt n_D,n_R; 3959 PetscBool check_corr[2],issbaij; 3960 PetscErrorCode ierr; 3961 /* prefixes stuff */ 3962 char dir_prefix[256],neu_prefix[256],str_level[16]; 3963 size_t len; 3964 3965 PetscFunctionBegin; 3966 3967 /* compute prefixes */ 3968 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3969 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3970 if (!pcbddc->current_level) { 3971 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3972 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3973 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3974 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3975 } else { 3976 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3977 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3978 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3979 len -= 15; /* remove "pc_bddc_coarse_" */ 3980 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3981 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3982 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3983 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3984 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3985 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3986 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3987 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3988 } 3989 3990 /* DIRICHLET PROBLEM */ 3991 if (dirichlet) { 3992 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3993 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3994 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3995 if (pcbddc->dbg_flag) { 3996 Mat A_IIn; 3997 3998 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3999 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4000 pcis->A_II = A_IIn; 4001 } 4002 } 4003 if (pcbddc->local_mat->symmetric_set) { 4004 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4005 } 4006 /* Matrix for Dirichlet problem is pcis->A_II */ 4007 n_D = pcis->n - pcis->n_B; 4008 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4009 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4010 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4011 /* default */ 4012 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4013 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4014 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4015 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4016 if (issbaij) { 4017 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4018 } else { 4019 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4020 } 4021 /* Allow user's customization */ 4022 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4023 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4024 } 4025 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4026 if (sub_schurs && sub_schurs->reuse_solver) { 4027 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4028 4029 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4030 } 4031 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4032 if (!n_D) { 4033 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4034 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4035 } 4036 /* Set Up KSP for Dirichlet problem of BDDC */ 4037 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4038 /* set ksp_D into pcis data */ 4039 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4040 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4041 pcis->ksp_D = pcbddc->ksp_D; 4042 } 4043 4044 /* NEUMANN PROBLEM */ 4045 A_RR = 0; 4046 if (neumann) { 4047 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4048 PetscInt ibs,mbs; 4049 PetscBool issbaij; 4050 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4051 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4052 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4053 if (pcbddc->ksp_R) { /* already created ksp */ 4054 PetscInt nn_R; 4055 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4056 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4057 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4058 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4059 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4060 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4061 reuse = MAT_INITIAL_MATRIX; 4062 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4063 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4064 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4065 reuse = MAT_INITIAL_MATRIX; 4066 } else { /* safe to reuse the matrix */ 4067 reuse = MAT_REUSE_MATRIX; 4068 } 4069 } 4070 /* last check */ 4071 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4072 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4073 reuse = MAT_INITIAL_MATRIX; 4074 } 4075 } else { /* first time, so we need to create the matrix */ 4076 reuse = MAT_INITIAL_MATRIX; 4077 } 4078 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4079 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4080 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4081 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4082 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4083 if (matis->A == pcbddc->local_mat) { 4084 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4085 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4086 } else { 4087 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4088 } 4089 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4090 if (matis->A == pcbddc->local_mat) { 4091 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4092 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4093 } else { 4094 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4095 } 4096 } 4097 /* extract A_RR */ 4098 if (sub_schurs && sub_schurs->reuse_solver) { 4099 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4100 4101 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4102 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4103 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4104 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4105 } else { 4106 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4107 } 4108 } else { 4109 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4110 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4111 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4112 } 4113 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4114 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4115 } 4116 if (pcbddc->local_mat->symmetric_set) { 4117 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4118 } 4119 if (!pcbddc->ksp_R) { /* create object if not present */ 4120 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4121 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4122 /* default */ 4123 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4124 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4125 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4126 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4127 if (issbaij) { 4128 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4129 } else { 4130 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4131 } 4132 /* Allow user's customization */ 4133 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4134 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4135 } 4136 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4137 if (!n_R) { 4138 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4139 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4140 } 4141 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4142 /* Reuse solver if it is present */ 4143 if (sub_schurs && sub_schurs->reuse_solver) { 4144 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4145 4146 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4147 } 4148 /* Set Up KSP for Neumann problem of BDDC */ 4149 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4150 } 4151 4152 if (pcbddc->dbg_flag) { 4153 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4154 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4155 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4156 } 4157 4158 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4159 check_corr[0] = check_corr[1] = PETSC_FALSE; 4160 if (pcbddc->NullSpace_corr[0]) { 4161 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4162 } 4163 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4164 check_corr[0] = PETSC_TRUE; 4165 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4166 } 4167 if (neumann && pcbddc->NullSpace_corr[2]) { 4168 check_corr[1] = PETSC_TRUE; 4169 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4170 } 4171 4172 /* check Dirichlet and Neumann solvers */ 4173 if (pcbddc->dbg_flag) { 4174 if (dirichlet) { /* Dirichlet */ 4175 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4176 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4177 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4178 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4179 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4181 if (check_corr[0]) { 4182 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4183 } 4184 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4185 } 4186 if (neumann) { /* Neumann */ 4187 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4188 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4189 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4190 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4191 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4192 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4193 if (check_corr[1]) { 4194 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4195 } 4196 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4197 } 4198 } 4199 /* free Neumann problem's matrix */ 4200 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4201 PetscFunctionReturn(0); 4202 } 4203 4204 #undef __FUNCT__ 4205 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4206 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4207 { 4208 PetscErrorCode ierr; 4209 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4210 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4211 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4212 4213 PetscFunctionBegin; 4214 if (!reuse_solver) { 4215 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4216 } 4217 if (!pcbddc->switch_static) { 4218 if (applytranspose && pcbddc->local_auxmat1) { 4219 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4220 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4221 } 4222 if (!reuse_solver) { 4223 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4224 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4225 } else { 4226 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4227 4228 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4229 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4230 } 4231 } else { 4232 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4233 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4234 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4235 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4236 if (applytranspose && pcbddc->local_auxmat1) { 4237 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4238 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4239 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4240 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4241 } 4242 } 4243 if (!reuse_solver || pcbddc->switch_static) { 4244 if (applytranspose) { 4245 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4246 } else { 4247 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4248 } 4249 } else { 4250 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4251 4252 if (applytranspose) { 4253 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4254 } else { 4255 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4256 } 4257 } 4258 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4259 if (!pcbddc->switch_static) { 4260 if (!reuse_solver) { 4261 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4262 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4263 } else { 4264 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4265 4266 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4267 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4268 } 4269 if (!applytranspose && pcbddc->local_auxmat1) { 4270 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4271 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4272 } 4273 } else { 4274 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4275 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4276 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4277 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4278 if (!applytranspose && pcbddc->local_auxmat1) { 4279 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4280 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4281 } 4282 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4283 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4284 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4285 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4286 } 4287 PetscFunctionReturn(0); 4288 } 4289 4290 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4291 #undef __FUNCT__ 4292 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4293 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4294 { 4295 PetscErrorCode ierr; 4296 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4297 PC_IS* pcis = (PC_IS*) (pc->data); 4298 const PetscScalar zero = 0.0; 4299 4300 PetscFunctionBegin; 4301 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4302 if (!pcbddc->benign_apply_coarse_only) { 4303 if (applytranspose) { 4304 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4305 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4306 } else { 4307 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4308 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4309 } 4310 } else { 4311 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4312 } 4313 4314 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4315 if (pcbddc->benign_n) { 4316 PetscScalar *array; 4317 PetscInt j; 4318 4319 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4320 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4321 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4322 } 4323 4324 /* start communications from local primal nodes to rhs of coarse solver */ 4325 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4326 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4327 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4328 4329 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4330 if (pcbddc->coarse_ksp) { 4331 Mat coarse_mat; 4332 Vec rhs,sol; 4333 MatNullSpace nullsp; 4334 PetscBool isbddc = PETSC_FALSE; 4335 4336 if (pcbddc->benign_have_null) { 4337 PC coarse_pc; 4338 4339 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4340 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4341 /* we need to propagate to coarser levels the need for a possible benign correction */ 4342 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4343 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4344 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4345 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4346 } 4347 } 4348 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4349 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4350 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4351 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4352 if (nullsp) { 4353 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4354 } 4355 if (applytranspose) { 4356 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4357 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4358 } else { 4359 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4360 PC coarse_pc; 4361 4362 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4363 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4364 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4365 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4366 } else { 4367 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4368 } 4369 } 4370 /* we don't need the benign correction at coarser levels anymore */ 4371 if (pcbddc->benign_have_null && isbddc) { 4372 PC coarse_pc; 4373 PC_BDDC* coarsepcbddc; 4374 4375 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4376 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4377 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4378 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4379 } 4380 if (nullsp) { 4381 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4382 } 4383 } 4384 4385 /* Local solution on R nodes */ 4386 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4387 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4388 } 4389 /* communications from coarse sol to local primal nodes */ 4390 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4391 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4392 4393 /* Sum contributions from the two levels */ 4394 if (!pcbddc->benign_apply_coarse_only) { 4395 if (applytranspose) { 4396 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4397 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4398 } else { 4399 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4400 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4401 } 4402 /* store p0 */ 4403 if (pcbddc->benign_n) { 4404 PetscScalar *array; 4405 PetscInt j; 4406 4407 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4408 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4409 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4410 } 4411 } else { /* expand the coarse solution */ 4412 if (applytranspose) { 4413 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4414 } else { 4415 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4416 } 4417 } 4418 PetscFunctionReturn(0); 4419 } 4420 4421 #undef __FUNCT__ 4422 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4423 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4424 { 4425 PetscErrorCode ierr; 4426 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4427 PetscScalar *array; 4428 Vec from,to; 4429 4430 PetscFunctionBegin; 4431 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4432 from = pcbddc->coarse_vec; 4433 to = pcbddc->vec1_P; 4434 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4435 Vec tvec; 4436 4437 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4438 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4439 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4440 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4441 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4442 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4443 } 4444 } else { /* from local to global -> put data in coarse right hand side */ 4445 from = pcbddc->vec1_P; 4446 to = pcbddc->coarse_vec; 4447 } 4448 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4449 PetscFunctionReturn(0); 4450 } 4451 4452 #undef __FUNCT__ 4453 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4454 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4455 { 4456 PetscErrorCode ierr; 4457 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4458 PetscScalar *array; 4459 Vec from,to; 4460 4461 PetscFunctionBegin; 4462 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4463 from = pcbddc->coarse_vec; 4464 to = pcbddc->vec1_P; 4465 } else { /* from local to global -> put data in coarse right hand side */ 4466 from = pcbddc->vec1_P; 4467 to = pcbddc->coarse_vec; 4468 } 4469 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4470 if (smode == SCATTER_FORWARD) { 4471 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4472 Vec tvec; 4473 4474 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4475 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4476 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4477 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4478 } 4479 } else { 4480 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4481 ierr = VecResetArray(from);CHKERRQ(ierr); 4482 } 4483 } 4484 PetscFunctionReturn(0); 4485 } 4486 4487 /* uncomment for testing purposes */ 4488 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4489 #undef __FUNCT__ 4490 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4491 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4492 { 4493 PetscErrorCode ierr; 4494 PC_IS* pcis = (PC_IS*)(pc->data); 4495 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4496 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4497 /* one and zero */ 4498 PetscScalar one=1.0,zero=0.0; 4499 /* space to store constraints and their local indices */ 4500 PetscScalar *constraints_data; 4501 PetscInt *constraints_idxs,*constraints_idxs_B; 4502 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4503 PetscInt *constraints_n; 4504 /* iterators */ 4505 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4506 /* BLAS integers */ 4507 PetscBLASInt lwork,lierr; 4508 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4509 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4510 /* reuse */ 4511 PetscInt olocal_primal_size,olocal_primal_size_cc; 4512 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4513 /* change of basis */ 4514 PetscBool qr_needed; 4515 PetscBT change_basis,qr_needed_idx; 4516 /* auxiliary stuff */ 4517 PetscInt *nnz,*is_indices; 4518 PetscInt ncc; 4519 /* some quantities */ 4520 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4521 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4522 4523 PetscFunctionBegin; 4524 /* Destroy Mat objects computed previously */ 4525 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4526 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4527 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4528 /* save info on constraints from previous setup (if any) */ 4529 olocal_primal_size = pcbddc->local_primal_size; 4530 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4531 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4532 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4533 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4534 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4535 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4536 4537 if (!pcbddc->adaptive_selection) { 4538 IS ISForVertices,*ISForFaces,*ISForEdges; 4539 MatNullSpace nearnullsp; 4540 const Vec *nearnullvecs; 4541 Vec *localnearnullsp; 4542 PetscScalar *array; 4543 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4544 PetscBool nnsp_has_cnst; 4545 /* LAPACK working arrays for SVD or POD */ 4546 PetscBool skip_lapack,boolforchange; 4547 PetscScalar *work; 4548 PetscReal *singular_vals; 4549 #if defined(PETSC_USE_COMPLEX) 4550 PetscReal *rwork; 4551 #endif 4552 #if defined(PETSC_MISSING_LAPACK_GESVD) 4553 PetscScalar *temp_basis,*correlation_mat; 4554 #else 4555 PetscBLASInt dummy_int=1; 4556 PetscScalar dummy_scalar=1.; 4557 #endif 4558 4559 /* Get index sets for faces, edges and vertices from graph */ 4560 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4561 /* print some info */ 4562 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 4563 PetscInt nv; 4564 4565 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4566 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4567 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4569 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4571 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4572 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4573 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4574 } 4575 4576 /* free unneeded index sets */ 4577 if (!pcbddc->use_vertices) { 4578 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4579 } 4580 if (!pcbddc->use_edges) { 4581 for (i=0;i<n_ISForEdges;i++) { 4582 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4583 } 4584 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4585 n_ISForEdges = 0; 4586 } 4587 if (!pcbddc->use_faces) { 4588 for (i=0;i<n_ISForFaces;i++) { 4589 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4590 } 4591 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4592 n_ISForFaces = 0; 4593 } 4594 4595 /* check if near null space is attached to global mat */ 4596 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4597 if (nearnullsp) { 4598 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4599 /* remove any stored info */ 4600 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4601 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4602 /* store information for BDDC solver reuse */ 4603 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4604 pcbddc->onearnullspace = nearnullsp; 4605 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4606 for (i=0;i<nnsp_size;i++) { 4607 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4608 } 4609 } else { /* if near null space is not provided BDDC uses constants by default */ 4610 nnsp_size = 0; 4611 nnsp_has_cnst = PETSC_TRUE; 4612 } 4613 /* get max number of constraints on a single cc */ 4614 max_constraints = nnsp_size; 4615 if (nnsp_has_cnst) max_constraints++; 4616 4617 /* 4618 Evaluate maximum storage size needed by the procedure 4619 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4620 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4621 There can be multiple constraints per connected component 4622 */ 4623 n_vertices = 0; 4624 if (ISForVertices) { 4625 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4626 } 4627 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4628 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4629 4630 total_counts = n_ISForFaces+n_ISForEdges; 4631 total_counts *= max_constraints; 4632 total_counts += n_vertices; 4633 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4634 4635 total_counts = 0; 4636 max_size_of_constraint = 0; 4637 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4638 IS used_is; 4639 if (i<n_ISForEdges) { 4640 used_is = ISForEdges[i]; 4641 } else { 4642 used_is = ISForFaces[i-n_ISForEdges]; 4643 } 4644 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4645 total_counts += j; 4646 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4647 } 4648 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4649 4650 /* get local part of global near null space vectors */ 4651 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4652 for (k=0;k<nnsp_size;k++) { 4653 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4654 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4655 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4656 } 4657 4658 /* whether or not to skip lapack calls */ 4659 skip_lapack = PETSC_TRUE; 4660 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4661 4662 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4663 if (!skip_lapack) { 4664 PetscScalar temp_work; 4665 4666 #if defined(PETSC_MISSING_LAPACK_GESVD) 4667 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4668 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4669 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4670 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4671 #if defined(PETSC_USE_COMPLEX) 4672 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4673 #endif 4674 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4675 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4676 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4677 lwork = -1; 4678 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4679 #if !defined(PETSC_USE_COMPLEX) 4680 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4681 #else 4682 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4683 #endif 4684 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4685 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4686 #else /* on missing GESVD */ 4687 /* SVD */ 4688 PetscInt max_n,min_n; 4689 max_n = max_size_of_constraint; 4690 min_n = max_constraints; 4691 if (max_size_of_constraint < max_constraints) { 4692 min_n = max_size_of_constraint; 4693 max_n = max_constraints; 4694 } 4695 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 4696 #if defined(PETSC_USE_COMPLEX) 4697 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 4698 #endif 4699 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4700 lwork = -1; 4701 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 4702 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 4703 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 4704 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4705 #if !defined(PETSC_USE_COMPLEX) 4706 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 4707 #else 4708 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 4709 #endif 4710 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4711 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 4712 #endif /* on missing GESVD */ 4713 /* Allocate optimal workspace */ 4714 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 4715 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 4716 } 4717 /* Now we can loop on constraining sets */ 4718 total_counts = 0; 4719 constraints_idxs_ptr[0] = 0; 4720 constraints_data_ptr[0] = 0; 4721 /* vertices */ 4722 if (n_vertices) { 4723 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4724 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4725 for (i=0;i<n_vertices;i++) { 4726 constraints_n[total_counts] = 1; 4727 constraints_data[total_counts] = 1.0; 4728 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 4729 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 4730 total_counts++; 4731 } 4732 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4733 n_vertices = total_counts; 4734 } 4735 4736 /* edges and faces */ 4737 total_counts_cc = total_counts; 4738 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 4739 IS used_is; 4740 PetscBool idxs_copied = PETSC_FALSE; 4741 4742 if (ncc<n_ISForEdges) { 4743 used_is = ISForEdges[ncc]; 4744 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 4745 } else { 4746 used_is = ISForFaces[ncc-n_ISForEdges]; 4747 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 4748 } 4749 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 4750 4751 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 4752 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4753 /* change of basis should not be performed on local periodic nodes */ 4754 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 4755 if (nnsp_has_cnst) { 4756 PetscScalar quad_value; 4757 4758 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4759 idxs_copied = PETSC_TRUE; 4760 4761 if (!pcbddc->use_nnsp_true) { 4762 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 4763 } else { 4764 quad_value = 1.0; 4765 } 4766 for (j=0;j<size_of_constraint;j++) { 4767 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 4768 } 4769 temp_constraints++; 4770 total_counts++; 4771 } 4772 for (k=0;k<nnsp_size;k++) { 4773 PetscReal real_value; 4774 PetscScalar *ptr_to_data; 4775 4776 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4777 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 4778 for (j=0;j<size_of_constraint;j++) { 4779 ptr_to_data[j] = array[is_indices[j]]; 4780 } 4781 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4782 /* check if array is null on the connected component */ 4783 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4784 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 4785 if (real_value > 0.0) { /* keep indices and values */ 4786 temp_constraints++; 4787 total_counts++; 4788 if (!idxs_copied) { 4789 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4790 idxs_copied = PETSC_TRUE; 4791 } 4792 } 4793 } 4794 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4795 valid_constraints = temp_constraints; 4796 if (!pcbddc->use_nnsp_true && temp_constraints) { 4797 if (temp_constraints == 1) { /* just normalize the constraint */ 4798 PetscScalar norm,*ptr_to_data; 4799 4800 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 4801 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4802 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 4803 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 4804 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 4805 } else { /* perform SVD */ 4806 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 4807 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 4808 4809 #if defined(PETSC_MISSING_LAPACK_GESVD) 4810 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 4811 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 4812 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 4813 the constraints basis will differ (by a complex factor with absolute value equal to 1) 4814 from that computed using LAPACKgesvd 4815 -> This is due to a different computation of eigenvectors in LAPACKheev 4816 -> The quality of the POD-computed basis will be the same */ 4817 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 4818 /* Store upper triangular part of correlation matrix */ 4819 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4820 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4821 for (j=0;j<temp_constraints;j++) { 4822 for (k=0;k<j+1;k++) { 4823 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 4824 } 4825 } 4826 /* compute eigenvalues and eigenvectors of correlation matrix */ 4827 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4828 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 4829 #if !defined(PETSC_USE_COMPLEX) 4830 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 4831 #else 4832 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 4833 #endif 4834 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4835 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 4836 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 4837 j = 0; 4838 while (j < temp_constraints && singular_vals[j] < tol) j++; 4839 total_counts = total_counts-j; 4840 valid_constraints = temp_constraints-j; 4841 /* scale and copy POD basis into used quadrature memory */ 4842 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4843 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4844 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 4845 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4846 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 4847 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4848 if (j<temp_constraints) { 4849 PetscInt ii; 4850 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 4851 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4852 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 4853 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4854 for (k=0;k<temp_constraints-j;k++) { 4855 for (ii=0;ii<size_of_constraint;ii++) { 4856 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 4857 } 4858 } 4859 } 4860 #else /* on missing GESVD */ 4861 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4862 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4863 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4864 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4865 #if !defined(PETSC_USE_COMPLEX) 4866 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 4867 #else 4868 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 4869 #endif 4870 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 4871 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4872 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 4873 k = temp_constraints; 4874 if (k > size_of_constraint) k = size_of_constraint; 4875 j = 0; 4876 while (j < k && singular_vals[k-j-1] < tol) j++; 4877 valid_constraints = k-j; 4878 total_counts = total_counts-temp_constraints+valid_constraints; 4879 #endif /* on missing GESVD */ 4880 } 4881 } 4882 /* update pointers information */ 4883 if (valid_constraints) { 4884 constraints_n[total_counts_cc] = valid_constraints; 4885 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4886 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4887 /* set change_of_basis flag */ 4888 if (boolforchange) { 4889 PetscBTSet(change_basis,total_counts_cc); 4890 } 4891 total_counts_cc++; 4892 } 4893 } 4894 /* free workspace */ 4895 if (!skip_lapack) { 4896 ierr = PetscFree(work);CHKERRQ(ierr); 4897 #if defined(PETSC_USE_COMPLEX) 4898 ierr = PetscFree(rwork);CHKERRQ(ierr); 4899 #endif 4900 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4901 #if defined(PETSC_MISSING_LAPACK_GESVD) 4902 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4903 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4904 #endif 4905 } 4906 for (k=0;k<nnsp_size;k++) { 4907 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4908 } 4909 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4910 /* free index sets of faces, edges and vertices */ 4911 for (i=0;i<n_ISForFaces;i++) { 4912 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4913 } 4914 if (n_ISForFaces) { 4915 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4916 } 4917 for (i=0;i<n_ISForEdges;i++) { 4918 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4919 } 4920 if (n_ISForEdges) { 4921 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4922 } 4923 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4924 } else { 4925 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4926 4927 total_counts = 0; 4928 n_vertices = 0; 4929 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4930 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4931 } 4932 max_constraints = 0; 4933 total_counts_cc = 0; 4934 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4935 total_counts += pcbddc->adaptive_constraints_n[i]; 4936 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4937 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4938 } 4939 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4940 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4941 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4942 constraints_data = pcbddc->adaptive_constraints_data; 4943 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4944 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4945 total_counts_cc = 0; 4946 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4947 if (pcbddc->adaptive_constraints_n[i]) { 4948 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4949 } 4950 } 4951 #if 0 4952 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4953 for (i=0;i<total_counts_cc;i++) { 4954 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4955 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4956 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4957 printf(" %d",constraints_idxs[j]); 4958 } 4959 printf("\n"); 4960 printf("number of cc: %d\n",constraints_n[i]); 4961 } 4962 for (i=0;i<n_vertices;i++) { 4963 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4964 } 4965 for (i=0;i<sub_schurs->n_subs;i++) { 4966 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4967 } 4968 #endif 4969 4970 max_size_of_constraint = 0; 4971 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4972 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4973 /* Change of basis */ 4974 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4975 if (pcbddc->use_change_of_basis) { 4976 for (i=0;i<sub_schurs->n_subs;i++) { 4977 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4978 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4979 } 4980 } 4981 } 4982 } 4983 pcbddc->local_primal_size = total_counts; 4984 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4985 4986 /* map constraints_idxs in boundary numbering */ 4987 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4988 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4989 4990 /* Create constraint matrix */ 4991 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4992 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4993 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4994 4995 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4996 /* determine if a QR strategy is needed for change of basis */ 4997 qr_needed = PETSC_FALSE; 4998 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4999 total_primal_vertices=0; 5000 pcbddc->local_primal_size_cc = 0; 5001 for (i=0;i<total_counts_cc;i++) { 5002 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5003 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5004 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5005 pcbddc->local_primal_size_cc += 1; 5006 } else if (PetscBTLookup(change_basis,i)) { 5007 for (k=0;k<constraints_n[i];k++) { 5008 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5009 } 5010 pcbddc->local_primal_size_cc += constraints_n[i]; 5011 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5012 PetscBTSet(qr_needed_idx,i); 5013 qr_needed = PETSC_TRUE; 5014 } 5015 } else { 5016 pcbddc->local_primal_size_cc += 1; 5017 } 5018 } 5019 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5020 pcbddc->n_vertices = total_primal_vertices; 5021 /* permute indices in order to have a sorted set of vertices */ 5022 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5023 5024 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5025 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5026 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5027 5028 /* nonzero structure of constraint matrix */ 5029 /* and get reference dof for local constraints */ 5030 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5031 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5032 5033 j = total_primal_vertices; 5034 total_counts = total_primal_vertices; 5035 cum = total_primal_vertices; 5036 for (i=n_vertices;i<total_counts_cc;i++) { 5037 if (!PetscBTLookup(change_basis,i)) { 5038 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5039 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5040 cum++; 5041 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5042 for (k=0;k<constraints_n[i];k++) { 5043 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5044 nnz[j+k] = size_of_constraint; 5045 } 5046 j += constraints_n[i]; 5047 } 5048 } 5049 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5050 ierr = PetscFree(nnz);CHKERRQ(ierr); 5051 5052 /* set values in constraint matrix */ 5053 for (i=0;i<total_primal_vertices;i++) { 5054 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5055 } 5056 total_counts = total_primal_vertices; 5057 for (i=n_vertices;i<total_counts_cc;i++) { 5058 if (!PetscBTLookup(change_basis,i)) { 5059 PetscInt *cols; 5060 5061 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5062 cols = constraints_idxs+constraints_idxs_ptr[i]; 5063 for (k=0;k<constraints_n[i];k++) { 5064 PetscInt row = total_counts+k; 5065 PetscScalar *vals; 5066 5067 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5068 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5069 } 5070 total_counts += constraints_n[i]; 5071 } 5072 } 5073 /* assembling */ 5074 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5075 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5076 5077 /* 5078 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5079 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5080 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5081 */ 5082 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5083 if (pcbddc->use_change_of_basis) { 5084 /* dual and primal dofs on a single cc */ 5085 PetscInt dual_dofs,primal_dofs; 5086 /* working stuff for GEQRF */ 5087 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5088 PetscBLASInt lqr_work; 5089 /* working stuff for UNGQR */ 5090 PetscScalar *gqr_work,lgqr_work_t; 5091 PetscBLASInt lgqr_work; 5092 /* working stuff for TRTRS */ 5093 PetscScalar *trs_rhs; 5094 PetscBLASInt Blas_NRHS; 5095 /* pointers for values insertion into change of basis matrix */ 5096 PetscInt *start_rows,*start_cols; 5097 PetscScalar *start_vals; 5098 /* working stuff for values insertion */ 5099 PetscBT is_primal; 5100 PetscInt *aux_primal_numbering_B; 5101 /* matrix sizes */ 5102 PetscInt global_size,local_size; 5103 /* temporary change of basis */ 5104 Mat localChangeOfBasisMatrix; 5105 /* extra space for debugging */ 5106 PetscScalar *dbg_work; 5107 5108 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5109 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5110 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5111 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5112 /* nonzeros for local mat */ 5113 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5114 if (!pcbddc->benign_change || pcbddc->fake_change) { 5115 for (i=0;i<pcis->n;i++) nnz[i]=1; 5116 } else { 5117 const PetscInt *ii; 5118 PetscInt n; 5119 PetscBool flg_row; 5120 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5121 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5122 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5123 } 5124 for (i=n_vertices;i<total_counts_cc;i++) { 5125 if (PetscBTLookup(change_basis,i)) { 5126 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5127 if (PetscBTLookup(qr_needed_idx,i)) { 5128 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5129 } else { 5130 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5131 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5132 } 5133 } 5134 } 5135 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5136 ierr = PetscFree(nnz);CHKERRQ(ierr); 5137 /* Set interior change in the matrix */ 5138 if (!pcbddc->benign_change || pcbddc->fake_change) { 5139 for (i=0;i<pcis->n;i++) { 5140 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5141 } 5142 } else { 5143 const PetscInt *ii,*jj; 5144 PetscScalar *aa; 5145 PetscInt n; 5146 PetscBool flg_row; 5147 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5148 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5149 for (i=0;i<n;i++) { 5150 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5151 } 5152 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5153 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5154 } 5155 5156 if (pcbddc->dbg_flag) { 5157 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5158 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5159 } 5160 5161 5162 /* Now we loop on the constraints which need a change of basis */ 5163 /* 5164 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5165 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5166 5167 Basic blocks of change of basis matrix T computed by 5168 5169 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5170 5171 | 1 0 ... 0 s_1/S | 5172 | 0 1 ... 0 s_2/S | 5173 | ... | 5174 | 0 ... 1 s_{n-1}/S | 5175 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5176 5177 with S = \sum_{i=1}^n s_i^2 5178 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5179 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5180 5181 - QR decomposition of constraints otherwise 5182 */ 5183 if (qr_needed) { 5184 /* space to store Q */ 5185 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5186 /* array to store scaling factors for reflectors */ 5187 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5188 /* first we issue queries for optimal work */ 5189 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5190 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5191 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5192 lqr_work = -1; 5193 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5194 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5195 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5196 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5197 lgqr_work = -1; 5198 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5199 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5200 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5201 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5202 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5203 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5204 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5205 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5206 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5207 /* array to store rhs and solution of triangular solver */ 5208 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5209 /* allocating workspace for check */ 5210 if (pcbddc->dbg_flag) { 5211 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5212 } 5213 } 5214 /* array to store whether a node is primal or not */ 5215 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5216 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5217 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5218 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5219 for (i=0;i<total_primal_vertices;i++) { 5220 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5221 } 5222 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5223 5224 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5225 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5226 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5227 if (PetscBTLookup(change_basis,total_counts)) { 5228 /* get constraint info */ 5229 primal_dofs = constraints_n[total_counts]; 5230 dual_dofs = size_of_constraint-primal_dofs; 5231 5232 if (pcbddc->dbg_flag) { 5233 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5234 } 5235 5236 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5237 5238 /* copy quadrature constraints for change of basis check */ 5239 if (pcbddc->dbg_flag) { 5240 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5241 } 5242 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5243 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5244 5245 /* compute QR decomposition of constraints */ 5246 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5247 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5248 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5249 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5250 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5251 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5252 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5253 5254 /* explictly compute R^-T */ 5255 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5256 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5257 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5258 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5259 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5260 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5261 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5262 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5263 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5264 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5265 5266 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5267 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5268 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5269 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5270 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5271 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5272 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5273 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5274 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5275 5276 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5277 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5278 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5279 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5280 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5281 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5282 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5283 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5284 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5285 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5286 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5287 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5288 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5289 5290 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5291 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5292 /* insert cols for primal dofs */ 5293 for (j=0;j<primal_dofs;j++) { 5294 start_vals = &qr_basis[j*size_of_constraint]; 5295 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5296 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5297 } 5298 /* insert cols for dual dofs */ 5299 for (j=0,k=0;j<dual_dofs;k++) { 5300 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5301 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5302 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5303 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5304 j++; 5305 } 5306 } 5307 5308 /* check change of basis */ 5309 if (pcbddc->dbg_flag) { 5310 PetscInt ii,jj; 5311 PetscBool valid_qr=PETSC_TRUE; 5312 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5313 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5314 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5315 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5316 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5317 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5318 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5319 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5320 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5321 for (jj=0;jj<size_of_constraint;jj++) { 5322 for (ii=0;ii<primal_dofs;ii++) { 5323 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5324 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5325 } 5326 } 5327 if (!valid_qr) { 5328 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5329 for (jj=0;jj<size_of_constraint;jj++) { 5330 for (ii=0;ii<primal_dofs;ii++) { 5331 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5332 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5333 } 5334 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5335 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5336 } 5337 } 5338 } 5339 } else { 5340 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5341 } 5342 } 5343 } else { /* simple transformation block */ 5344 PetscInt row,col; 5345 PetscScalar val,norm; 5346 5347 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5348 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5349 for (j=0;j<size_of_constraint;j++) { 5350 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5351 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5352 if (!PetscBTLookup(is_primal,row_B)) { 5353 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5354 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5355 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5356 } else { 5357 for (k=0;k<size_of_constraint;k++) { 5358 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5359 if (row != col) { 5360 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5361 } else { 5362 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5363 } 5364 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5365 } 5366 } 5367 } 5368 if (pcbddc->dbg_flag) { 5369 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5370 } 5371 } 5372 } else { 5373 if (pcbddc->dbg_flag) { 5374 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5375 } 5376 } 5377 } 5378 5379 /* free workspace */ 5380 if (qr_needed) { 5381 if (pcbddc->dbg_flag) { 5382 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5383 } 5384 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5385 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5386 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5387 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5388 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5389 } 5390 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5391 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5392 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5393 5394 /* assembling of global change of variable */ 5395 if (!pcbddc->fake_change) { 5396 Mat tmat; 5397 PetscInt bs; 5398 5399 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5400 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5401 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5402 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5403 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5404 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5405 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5406 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5407 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5408 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5409 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5410 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5411 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5412 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5413 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5414 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5415 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5416 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5417 5418 /* check */ 5419 if (pcbddc->dbg_flag) { 5420 PetscReal error; 5421 Vec x,x_change; 5422 5423 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5424 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5425 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5426 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5427 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5428 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5429 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5430 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5431 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5432 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5433 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5434 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5435 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5436 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5437 ierr = VecDestroy(&x);CHKERRQ(ierr); 5438 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5439 } 5440 /* adapt sub_schurs computed (if any) */ 5441 if (pcbddc->use_deluxe_scaling) { 5442 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5443 5444 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5445 if (sub_schurs && sub_schurs->S_Ej_all) { 5446 Mat S_new,tmat; 5447 IS is_all_N,is_V_Sall = NULL; 5448 5449 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5450 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5451 if (pcbddc->deluxe_zerorows) { 5452 ISLocalToGlobalMapping NtoSall; 5453 IS is_V; 5454 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5455 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5456 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5457 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5458 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5459 } 5460 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5461 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5462 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5463 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5464 if (pcbddc->deluxe_zerorows) { 5465 const PetscScalar *array; 5466 const PetscInt *idxs_V,*idxs_all; 5467 PetscInt i,n_V; 5468 5469 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5470 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5471 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5472 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5473 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5474 for (i=0;i<n_V;i++) { 5475 PetscScalar val; 5476 PetscInt idx; 5477 5478 idx = idxs_V[i]; 5479 val = array[idxs_all[idxs_V[i]]]; 5480 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5481 } 5482 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5483 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5484 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5485 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5486 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5487 } 5488 sub_schurs->S_Ej_all = S_new; 5489 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5490 if (sub_schurs->sum_S_Ej_all) { 5491 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5492 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5493 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5494 if (pcbddc->deluxe_zerorows) { 5495 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5496 } 5497 sub_schurs->sum_S_Ej_all = S_new; 5498 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5499 } 5500 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5501 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5502 } 5503 /* destroy any change of basis context in sub_schurs */ 5504 if (sub_schurs && sub_schurs->change) { 5505 PetscInt i; 5506 5507 for (i=0;i<sub_schurs->n_subs;i++) { 5508 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5509 } 5510 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5511 } 5512 } 5513 if (pcbddc->switch_static) { /* need to save the local change */ 5514 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5515 } else { 5516 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5517 } 5518 /* determine if any process has changed the pressures locally */ 5519 pcbddc->change_interior = pcbddc->benign_have_null; 5520 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5521 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5522 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5523 pcbddc->use_qr_single = qr_needed; 5524 } 5525 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5526 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5527 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5528 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5529 } else { 5530 Mat benign_global = NULL; 5531 if (pcbddc->benign_have_null) { 5532 Mat tmat; 5533 5534 pcbddc->change_interior = PETSC_TRUE; 5535 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5536 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5537 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5538 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5539 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5540 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5541 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5542 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5543 if (pcbddc->benign_change) { 5544 Mat M; 5545 5546 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5547 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5548 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5549 ierr = MatDestroy(&M);CHKERRQ(ierr); 5550 } else { 5551 Mat eye; 5552 PetscScalar *array; 5553 5554 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5555 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5556 for (i=0;i<pcis->n;i++) { 5557 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5558 } 5559 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5560 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5561 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5562 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5563 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5564 } 5565 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5566 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5567 } 5568 if (pcbddc->user_ChangeOfBasisMatrix) { 5569 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5570 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5571 } else if (pcbddc->benign_have_null) { 5572 pcbddc->ChangeOfBasisMatrix = benign_global; 5573 } 5574 } 5575 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5576 IS is_global; 5577 const PetscInt *gidxs; 5578 5579 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5580 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5581 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5582 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5583 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5584 } 5585 } 5586 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5587 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5588 } 5589 5590 if (!pcbddc->fake_change) { 5591 /* add pressure dofs to set of primal nodes for numbering purposes */ 5592 for (i=0;i<pcbddc->benign_n;i++) { 5593 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5594 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5595 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5596 pcbddc->local_primal_size_cc++; 5597 pcbddc->local_primal_size++; 5598 } 5599 5600 /* check if a new primal space has been introduced (also take into account benign trick) */ 5601 pcbddc->new_primal_space_local = PETSC_TRUE; 5602 if (olocal_primal_size == pcbddc->local_primal_size) { 5603 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5604 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5605 if (!pcbddc->new_primal_space_local) { 5606 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5607 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5608 } 5609 } 5610 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5611 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5612 } 5613 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5614 5615 /* flush dbg viewer */ 5616 if (pcbddc->dbg_flag) { 5617 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5618 } 5619 5620 /* free workspace */ 5621 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5622 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5623 if (!pcbddc->adaptive_selection) { 5624 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5625 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5626 } else { 5627 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5628 pcbddc->adaptive_constraints_idxs_ptr, 5629 pcbddc->adaptive_constraints_data_ptr, 5630 pcbddc->adaptive_constraints_idxs, 5631 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5632 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5633 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5634 } 5635 PetscFunctionReturn(0); 5636 } 5637 5638 #undef __FUNCT__ 5639 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5640 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5641 { 5642 ISLocalToGlobalMapping map; 5643 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5644 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5645 PetscInt ierr,i,N; 5646 5647 PetscFunctionBegin; 5648 if (pcbddc->graphanalyzed && !pcbddc->recompute_topography) PetscFunctionReturn(0); 5649 /* Reset previously computed graph */ 5650 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5651 /* Init local Graph struct */ 5652 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5653 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5654 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5655 5656 /* Check validity of the csr graph passed in by the user */ 5657 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5658 5659 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5660 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5661 PetscInt *xadj,*adjncy; 5662 PetscInt nvtxs; 5663 PetscBool flg_row=PETSC_FALSE; 5664 5665 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5666 if (flg_row) { 5667 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5668 pcbddc->computed_rowadj = PETSC_TRUE; 5669 } 5670 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5671 } 5672 if (pcbddc->dbg_flag) { 5673 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5674 } 5675 5676 /* Setup of Graph */ 5677 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5678 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5679 5680 /* attach info on disconnected subdomains if present */ 5681 if (pcbddc->n_local_subs) { 5682 PetscInt *local_subs; 5683 5684 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5685 for (i=0;i<pcbddc->n_local_subs;i++) { 5686 const PetscInt *idxs; 5687 PetscInt nl,j; 5688 5689 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5690 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5691 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 5692 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5693 } 5694 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 5695 pcbddc->mat_graph->local_subs = local_subs; 5696 } 5697 5698 /* Graph's connected components analysis */ 5699 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 5700 5701 /* set flag indicating analysis has been done */ 5702 pcbddc->graphanalyzed = PETSC_TRUE; 5703 PetscFunctionReturn(0); 5704 } 5705 5706 #undef __FUNCT__ 5707 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 5708 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 5709 { 5710 PetscInt i,j; 5711 PetscScalar *alphas; 5712 PetscErrorCode ierr; 5713 5714 PetscFunctionBegin; 5715 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 5716 for (i=0;i<n;i++) { 5717 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 5718 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 5719 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 5720 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5721 } 5722 ierr = PetscFree(alphas);CHKERRQ(ierr); 5723 PetscFunctionReturn(0); 5724 } 5725 5726 #undef __FUNCT__ 5727 #define __FUNCT__ "MatISGetSubassemblingPattern" 5728 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5729 { 5730 Mat A; 5731 PetscInt n_neighs,*neighs,*n_shared,**shared; 5732 PetscMPIInt size,rank,color; 5733 PetscInt *xadj,*adjncy; 5734 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5735 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5736 PetscInt void_procs,*procs_candidates = NULL; 5737 PetscInt xadj_count, *count; 5738 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5739 PetscSubcomm psubcomm; 5740 MPI_Comm subcomm; 5741 PetscErrorCode ierr; 5742 5743 PetscFunctionBegin; 5744 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5745 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5746 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5747 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5748 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5749 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5750 5751 if (have_void) *have_void = PETSC_FALSE; 5752 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5753 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5754 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5755 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5756 im_active = !!(n); 5757 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5758 void_procs = size - active_procs; 5759 /* get ranks of of non-active processes in mat communicator */ 5760 if (void_procs) { 5761 PetscInt ncand; 5762 5763 if (have_void) *have_void = PETSC_TRUE; 5764 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5765 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5766 for (i=0,ncand=0;i<size;i++) { 5767 if (!procs_candidates[i]) { 5768 procs_candidates[ncand++] = i; 5769 } 5770 } 5771 /* force n_subdomains to be not greater that the number of non-active processes */ 5772 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5773 } 5774 5775 /* number of subdomains requested greater than active processes -> just shift the matrix 5776 number of subdomains requested 1 -> send to master or first candidate in voids */ 5777 if (active_procs < *n_subdomains || *n_subdomains == 1) { 5778 PetscInt issize,isidx,dest; 5779 if (*n_subdomains == 1) dest = 0; 5780 else dest = rank; 5781 if (im_active) { 5782 issize = 1; 5783 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5784 isidx = procs_candidates[dest]; 5785 } else { 5786 isidx = dest; 5787 } 5788 } else { 5789 issize = 0; 5790 isidx = -1; 5791 } 5792 *n_subdomains = active_procs; 5793 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 5794 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5795 PetscFunctionReturn(0); 5796 } 5797 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 5798 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 5799 threshold = PetscMax(threshold,2); 5800 5801 /* Get info on mapping */ 5802 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 5803 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5804 5805 /* build local CSR graph of subdomains' connectivity */ 5806 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 5807 xadj[0] = 0; 5808 xadj[1] = PetscMax(n_neighs-1,0); 5809 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 5810 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 5811 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 5812 for (i=1;i<n_neighs;i++) 5813 for (j=0;j<n_shared[i];j++) 5814 count[shared[i][j]] += 1; 5815 5816 xadj_count = 0; 5817 for (i=1;i<n_neighs;i++) { 5818 for (j=0;j<n_shared[i];j++) { 5819 if (count[shared[i][j]] < threshold) { 5820 adjncy[xadj_count] = neighs[i]; 5821 adjncy_wgt[xadj_count] = n_shared[i]; 5822 xadj_count++; 5823 break; 5824 } 5825 } 5826 } 5827 xadj[1] = xadj_count; 5828 ierr = PetscFree(count);CHKERRQ(ierr); 5829 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5830 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5831 5832 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 5833 5834 /* Restrict work on active processes only */ 5835 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 5836 if (void_procs) { 5837 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 5838 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 5839 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 5840 subcomm = PetscSubcommChild(psubcomm); 5841 } else { 5842 psubcomm = NULL; 5843 subcomm = PetscObjectComm((PetscObject)mat); 5844 } 5845 5846 v_wgt = NULL; 5847 if (!color) { 5848 ierr = PetscFree(xadj);CHKERRQ(ierr); 5849 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5850 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5851 } else { 5852 Mat subdomain_adj; 5853 IS new_ranks,new_ranks_contig; 5854 MatPartitioning partitioner; 5855 PetscInt rstart=0,rend=0; 5856 PetscInt *is_indices,*oldranks; 5857 PetscMPIInt size; 5858 PetscBool aggregate; 5859 5860 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5861 if (void_procs) { 5862 PetscInt prank = rank; 5863 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5864 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5865 for (i=0;i<xadj[1];i++) { 5866 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5867 } 5868 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5869 } else { 5870 oldranks = NULL; 5871 } 5872 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5873 if (aggregate) { /* TODO: all this part could be made more efficient */ 5874 PetscInt lrows,row,ncols,*cols; 5875 PetscMPIInt nrank; 5876 PetscScalar *vals; 5877 5878 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5879 lrows = 0; 5880 if (nrank<redprocs) { 5881 lrows = size/redprocs; 5882 if (nrank<size%redprocs) lrows++; 5883 } 5884 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5885 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5886 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5887 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5888 row = nrank; 5889 ncols = xadj[1]-xadj[0]; 5890 cols = adjncy; 5891 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5892 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5893 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5894 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5895 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5896 ierr = PetscFree(xadj);CHKERRQ(ierr); 5897 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5898 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5899 ierr = PetscFree(vals);CHKERRQ(ierr); 5900 if (use_vwgt) { 5901 Vec v; 5902 const PetscScalar *array; 5903 PetscInt nl; 5904 5905 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5906 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5907 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5908 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5909 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5910 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5911 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5912 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5913 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5914 ierr = VecDestroy(&v);CHKERRQ(ierr); 5915 } 5916 } else { 5917 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5918 if (use_vwgt) { 5919 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5920 v_wgt[0] = local_size; 5921 } 5922 } 5923 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5924 5925 /* Partition */ 5926 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5927 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5928 if (v_wgt) { 5929 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5930 } 5931 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5932 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5933 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5934 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5935 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5936 5937 /* renumber new_ranks to avoid "holes" in new set of processors */ 5938 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5939 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5940 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5941 if (!aggregate) { 5942 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5943 #if defined(PETSC_USE_DEBUG) 5944 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5945 #endif 5946 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5947 } else if (oldranks) { 5948 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5949 } else { 5950 ranks_send_to_idx[0] = is_indices[0]; 5951 } 5952 } else { 5953 PetscInt idxs[1]; 5954 PetscMPIInt tag; 5955 MPI_Request *reqs; 5956 5957 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5958 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5959 for (i=rstart;i<rend;i++) { 5960 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5961 } 5962 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5963 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5964 ierr = PetscFree(reqs);CHKERRQ(ierr); 5965 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5966 #if defined(PETSC_USE_DEBUG) 5967 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5968 #endif 5969 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5970 } else if (oldranks) { 5971 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5972 } else { 5973 ranks_send_to_idx[0] = idxs[0]; 5974 } 5975 } 5976 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5977 /* clean up */ 5978 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5979 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5980 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5981 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5982 } 5983 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5984 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5985 5986 /* assemble parallel IS for sends */ 5987 i = 1; 5988 if (!color) i=0; 5989 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5990 PetscFunctionReturn(0); 5991 } 5992 5993 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5994 5995 #undef __FUNCT__ 5996 #define __FUNCT__ "MatISSubassemble" 5997 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5998 { 5999 Mat local_mat; 6000 IS is_sends_internal; 6001 PetscInt rows,cols,new_local_rows; 6002 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6003 PetscBool ismatis,isdense,newisdense,destroy_mat; 6004 ISLocalToGlobalMapping l2gmap; 6005 PetscInt* l2gmap_indices; 6006 const PetscInt* is_indices; 6007 MatType new_local_type; 6008 /* buffers */ 6009 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6010 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6011 PetscInt *recv_buffer_idxs_local; 6012 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6013 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6014 /* MPI */ 6015 MPI_Comm comm,comm_n; 6016 PetscSubcomm subcomm; 6017 PetscMPIInt n_sends,n_recvs,commsize; 6018 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6019 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6020 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6021 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6022 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6023 PetscErrorCode ierr; 6024 6025 PetscFunctionBegin; 6026 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6027 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6028 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6029 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6030 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6031 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6032 PetscValidLogicalCollectiveBool(mat,reuse,6); 6033 PetscValidLogicalCollectiveInt(mat,nis,8); 6034 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6035 if (nvecs) { 6036 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6037 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6038 } 6039 /* further checks */ 6040 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6041 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6042 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6043 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6044 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6045 if (reuse && *mat_n) { 6046 PetscInt mrows,mcols,mnrows,mncols; 6047 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6048 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6049 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6050 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6051 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6052 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6053 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6054 } 6055 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6056 PetscValidLogicalCollectiveInt(mat,bs,0); 6057 6058 /* prepare IS for sending if not provided */ 6059 if (!is_sends) { 6060 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6061 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6062 } else { 6063 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6064 is_sends_internal = is_sends; 6065 } 6066 6067 /* get comm */ 6068 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6069 6070 /* compute number of sends */ 6071 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6072 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6073 6074 /* compute number of receives */ 6075 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6076 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6077 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6078 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6079 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6080 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6081 ierr = PetscFree(iflags);CHKERRQ(ierr); 6082 6083 /* restrict comm if requested */ 6084 subcomm = 0; 6085 destroy_mat = PETSC_FALSE; 6086 if (restrict_comm) { 6087 PetscMPIInt color,subcommsize; 6088 6089 color = 0; 6090 if (restrict_full) { 6091 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6092 } else { 6093 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6094 } 6095 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6096 subcommsize = commsize - subcommsize; 6097 /* check if reuse has been requested */ 6098 if (reuse) { 6099 if (*mat_n) { 6100 PetscMPIInt subcommsize2; 6101 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6102 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6103 comm_n = PetscObjectComm((PetscObject)*mat_n); 6104 } else { 6105 comm_n = PETSC_COMM_SELF; 6106 } 6107 } else { /* MAT_INITIAL_MATRIX */ 6108 PetscMPIInt rank; 6109 6110 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6111 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6112 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6113 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6114 comm_n = PetscSubcommChild(subcomm); 6115 } 6116 /* flag to destroy *mat_n if not significative */ 6117 if (color) destroy_mat = PETSC_TRUE; 6118 } else { 6119 comm_n = comm; 6120 } 6121 6122 /* prepare send/receive buffers */ 6123 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6124 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6125 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6126 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6127 if (nis) { 6128 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6129 } 6130 6131 /* Get data from local matrices */ 6132 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6133 /* TODO: See below some guidelines on how to prepare the local buffers */ 6134 /* 6135 send_buffer_vals should contain the raw values of the local matrix 6136 send_buffer_idxs should contain: 6137 - MatType_PRIVATE type 6138 - PetscInt size_of_l2gmap 6139 - PetscInt global_row_indices[size_of_l2gmap] 6140 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6141 */ 6142 else { 6143 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6144 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6145 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6146 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6147 send_buffer_idxs[1] = i; 6148 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6149 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6150 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6151 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6152 for (i=0;i<n_sends;i++) { 6153 ilengths_vals[is_indices[i]] = len*len; 6154 ilengths_idxs[is_indices[i]] = len+2; 6155 } 6156 } 6157 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6158 /* additional is (if any) */ 6159 if (nis) { 6160 PetscMPIInt psum; 6161 PetscInt j; 6162 for (j=0,psum=0;j<nis;j++) { 6163 PetscInt plen; 6164 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6165 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6166 psum += len+1; /* indices + lenght */ 6167 } 6168 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6169 for (j=0,psum=0;j<nis;j++) { 6170 PetscInt plen; 6171 const PetscInt *is_array_idxs; 6172 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6173 send_buffer_idxs_is[psum] = plen; 6174 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6175 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6176 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6177 psum += plen+1; /* indices + lenght */ 6178 } 6179 for (i=0;i<n_sends;i++) { 6180 ilengths_idxs_is[is_indices[i]] = psum; 6181 } 6182 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6183 } 6184 6185 buf_size_idxs = 0; 6186 buf_size_vals = 0; 6187 buf_size_idxs_is = 0; 6188 buf_size_vecs = 0; 6189 for (i=0;i<n_recvs;i++) { 6190 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6191 buf_size_vals += (PetscInt)olengths_vals[i]; 6192 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6193 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6194 } 6195 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6196 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6197 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6198 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6199 6200 /* get new tags for clean communications */ 6201 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6202 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6203 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6204 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6205 6206 /* allocate for requests */ 6207 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6208 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6209 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6210 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6211 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6212 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6213 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6214 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6215 6216 /* communications */ 6217 ptr_idxs = recv_buffer_idxs; 6218 ptr_vals = recv_buffer_vals; 6219 ptr_idxs_is = recv_buffer_idxs_is; 6220 ptr_vecs = recv_buffer_vecs; 6221 for (i=0;i<n_recvs;i++) { 6222 source_dest = onodes[i]; 6223 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6224 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6225 ptr_idxs += olengths_idxs[i]; 6226 ptr_vals += olengths_vals[i]; 6227 if (nis) { 6228 source_dest = onodes_is[i]; 6229 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6230 ptr_idxs_is += olengths_idxs_is[i]; 6231 } 6232 if (nvecs) { 6233 source_dest = onodes[i]; 6234 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6235 ptr_vecs += olengths_idxs[i]-2; 6236 } 6237 } 6238 for (i=0;i<n_sends;i++) { 6239 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6240 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6241 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6242 if (nis) { 6243 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6244 } 6245 if (nvecs) { 6246 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6247 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6248 } 6249 } 6250 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6251 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6252 6253 /* assemble new l2g map */ 6254 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6255 ptr_idxs = recv_buffer_idxs; 6256 new_local_rows = 0; 6257 for (i=0;i<n_recvs;i++) { 6258 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6259 ptr_idxs += olengths_idxs[i]; 6260 } 6261 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6262 ptr_idxs = recv_buffer_idxs; 6263 new_local_rows = 0; 6264 for (i=0;i<n_recvs;i++) { 6265 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6266 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6267 ptr_idxs += olengths_idxs[i]; 6268 } 6269 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6270 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6271 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6272 6273 /* infer new local matrix type from received local matrices type */ 6274 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6275 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6276 if (n_recvs) { 6277 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6278 ptr_idxs = recv_buffer_idxs; 6279 for (i=0;i<n_recvs;i++) { 6280 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6281 new_local_type_private = MATAIJ_PRIVATE; 6282 break; 6283 } 6284 ptr_idxs += olengths_idxs[i]; 6285 } 6286 switch (new_local_type_private) { 6287 case MATDENSE_PRIVATE: 6288 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6289 new_local_type = MATSEQAIJ; 6290 bs = 1; 6291 } else { /* if I receive only 1 dense matrix */ 6292 new_local_type = MATSEQDENSE; 6293 bs = 1; 6294 } 6295 break; 6296 case MATAIJ_PRIVATE: 6297 new_local_type = MATSEQAIJ; 6298 bs = 1; 6299 break; 6300 case MATBAIJ_PRIVATE: 6301 new_local_type = MATSEQBAIJ; 6302 break; 6303 case MATSBAIJ_PRIVATE: 6304 new_local_type = MATSEQSBAIJ; 6305 break; 6306 default: 6307 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6308 break; 6309 } 6310 } else { /* by default, new_local_type is seqdense */ 6311 new_local_type = MATSEQDENSE; 6312 bs = 1; 6313 } 6314 6315 /* create MATIS object if needed */ 6316 if (!reuse) { 6317 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6318 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6319 } else { 6320 /* it also destroys the local matrices */ 6321 if (*mat_n) { 6322 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6323 } else { /* this is a fake object */ 6324 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6325 } 6326 } 6327 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6328 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6329 6330 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6331 6332 /* Global to local map of received indices */ 6333 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6334 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6335 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6336 6337 /* restore attributes -> type of incoming data and its size */ 6338 buf_size_idxs = 0; 6339 for (i=0;i<n_recvs;i++) { 6340 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6341 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6342 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6343 } 6344 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6345 6346 /* set preallocation */ 6347 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6348 if (!newisdense) { 6349 PetscInt *new_local_nnz=0; 6350 6351 ptr_idxs = recv_buffer_idxs_local; 6352 if (n_recvs) { 6353 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6354 } 6355 for (i=0;i<n_recvs;i++) { 6356 PetscInt j; 6357 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6358 for (j=0;j<*(ptr_idxs+1);j++) { 6359 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6360 } 6361 } else { 6362 /* TODO */ 6363 } 6364 ptr_idxs += olengths_idxs[i]; 6365 } 6366 if (new_local_nnz) { 6367 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6368 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6369 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6370 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6371 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6372 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6373 } else { 6374 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6375 } 6376 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6377 } else { 6378 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6379 } 6380 6381 /* set values */ 6382 ptr_vals = recv_buffer_vals; 6383 ptr_idxs = recv_buffer_idxs_local; 6384 for (i=0;i<n_recvs;i++) { 6385 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6386 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6387 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6388 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6389 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6390 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6391 } else { 6392 /* TODO */ 6393 } 6394 ptr_idxs += olengths_idxs[i]; 6395 ptr_vals += olengths_vals[i]; 6396 } 6397 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6398 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6399 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6400 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6401 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6402 6403 #if 0 6404 if (!restrict_comm) { /* check */ 6405 Vec lvec,rvec; 6406 PetscReal infty_error; 6407 6408 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6409 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6410 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6411 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6412 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6413 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6414 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6415 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6416 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6417 } 6418 #endif 6419 6420 /* assemble new additional is (if any) */ 6421 if (nis) { 6422 PetscInt **temp_idxs,*count_is,j,psum; 6423 6424 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6425 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6426 ptr_idxs = recv_buffer_idxs_is; 6427 psum = 0; 6428 for (i=0;i<n_recvs;i++) { 6429 for (j=0;j<nis;j++) { 6430 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6431 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6432 psum += plen; 6433 ptr_idxs += plen+1; /* shift pointer to received data */ 6434 } 6435 } 6436 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6437 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6438 for (i=1;i<nis;i++) { 6439 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6440 } 6441 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6442 ptr_idxs = recv_buffer_idxs_is; 6443 for (i=0;i<n_recvs;i++) { 6444 for (j=0;j<nis;j++) { 6445 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6446 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6447 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6448 ptr_idxs += plen+1; /* shift pointer to received data */ 6449 } 6450 } 6451 for (i=0;i<nis;i++) { 6452 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6453 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6454 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6455 } 6456 ierr = PetscFree(count_is);CHKERRQ(ierr); 6457 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6458 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6459 } 6460 /* free workspace */ 6461 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6462 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6463 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6464 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6465 if (isdense) { 6466 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6467 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6468 } else { 6469 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6470 } 6471 if (nis) { 6472 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6473 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6474 } 6475 6476 if (nvecs) { 6477 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6478 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6479 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6480 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6481 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6482 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6483 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6484 /* set values */ 6485 ptr_vals = recv_buffer_vecs; 6486 ptr_idxs = recv_buffer_idxs_local; 6487 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6488 for (i=0;i<n_recvs;i++) { 6489 PetscInt j; 6490 for (j=0;j<*(ptr_idxs+1);j++) { 6491 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6492 } 6493 ptr_idxs += olengths_idxs[i]; 6494 ptr_vals += olengths_idxs[i]-2; 6495 } 6496 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6497 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6498 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6499 } 6500 6501 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6502 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6503 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6504 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6505 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6506 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6507 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6508 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6509 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6510 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6511 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6512 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6513 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6514 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6515 ierr = PetscFree(onodes);CHKERRQ(ierr); 6516 if (nis) { 6517 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6518 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6519 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6520 } 6521 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6522 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6523 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6524 for (i=0;i<nis;i++) { 6525 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6526 } 6527 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6528 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6529 } 6530 *mat_n = NULL; 6531 } 6532 PetscFunctionReturn(0); 6533 } 6534 6535 /* temporary hack into ksp private data structure */ 6536 #include <petsc/private/kspimpl.h> 6537 6538 #undef __FUNCT__ 6539 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6540 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6541 { 6542 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6543 PC_IS *pcis = (PC_IS*)pc->data; 6544 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6545 Mat coarsedivudotp = NULL; 6546 MatNullSpace CoarseNullSpace = NULL; 6547 ISLocalToGlobalMapping coarse_islg; 6548 IS coarse_is,*isarray; 6549 PetscInt i,im_active=-1,active_procs=-1; 6550 PetscInt nis,nisdofs,nisneu,nisvert; 6551 PC pc_temp; 6552 PCType coarse_pc_type; 6553 KSPType coarse_ksp_type; 6554 PetscBool multilevel_requested,multilevel_allowed; 6555 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6556 Mat t_coarse_mat_is; 6557 PetscInt ncoarse; 6558 PetscBool compute_vecs = PETSC_FALSE; 6559 PetscScalar *array; 6560 MatReuse coarse_mat_reuse; 6561 PetscBool restr, full_restr, have_void; 6562 PetscErrorCode ierr; 6563 6564 PetscFunctionBegin; 6565 /* Assign global numbering to coarse dofs */ 6566 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6567 PetscInt ocoarse_size; 6568 compute_vecs = PETSC_TRUE; 6569 ocoarse_size = pcbddc->coarse_size; 6570 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6571 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6572 /* see if we can avoid some work */ 6573 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6574 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6575 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6576 PC pc; 6577 PetscBool isbddc; 6578 6579 /* temporary workaround since PCBDDC does not have a reset method so far */ 6580 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6581 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6582 if (isbddc) { 6583 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6584 } else { 6585 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6586 } 6587 coarse_reuse = PETSC_FALSE; 6588 } else { /* we can safely reuse already computed coarse matrix */ 6589 coarse_reuse = PETSC_TRUE; 6590 } 6591 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6592 coarse_reuse = PETSC_FALSE; 6593 } 6594 /* reset any subassembling information */ 6595 if (!coarse_reuse || pcbddc->recompute_topography) { 6596 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6597 } 6598 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6599 coarse_reuse = PETSC_TRUE; 6600 } 6601 /* assemble coarse matrix */ 6602 if (coarse_reuse && pcbddc->coarse_ksp) { 6603 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6604 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6605 coarse_mat_reuse = MAT_REUSE_MATRIX; 6606 } else { 6607 coarse_mat = NULL; 6608 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6609 } 6610 6611 /* creates temporary l2gmap and IS for coarse indexes */ 6612 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6613 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6614 6615 /* creates temporary MATIS object for coarse matrix */ 6616 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6617 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6618 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6619 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6620 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6621 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6622 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6623 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6624 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6625 6626 /* count "active" (i.e. with positive local size) and "void" processes */ 6627 im_active = !!(pcis->n); 6628 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6629 6630 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6631 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6632 /* full_restr : just use the receivers from the subassembling pattern */ 6633 coarse_mat_is = NULL; 6634 multilevel_allowed = PETSC_FALSE; 6635 multilevel_requested = PETSC_FALSE; 6636 full_restr = PETSC_TRUE; 6637 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6638 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6639 if (multilevel_requested) { 6640 ncoarse = active_procs/pcbddc->coarsening_ratio; 6641 restr = PETSC_FALSE; 6642 full_restr = PETSC_FALSE; 6643 } else { 6644 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6645 restr = PETSC_TRUE; 6646 full_restr = PETSC_TRUE; 6647 } 6648 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6649 ncoarse = PetscMax(1,ncoarse); 6650 if (!pcbddc->coarse_subassembling) { 6651 if (pcbddc->coarsening_ratio > 1) { 6652 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6653 } else { 6654 PetscMPIInt size,rank; 6655 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6656 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6657 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6658 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6659 } 6660 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6661 PetscInt psum; 6662 PetscMPIInt size; 6663 if (pcbddc->coarse_ksp) psum = 1; 6664 else psum = 0; 6665 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6666 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6667 if (ncoarse < size) have_void = PETSC_TRUE; 6668 } 6669 /* determine if we can go multilevel */ 6670 if (multilevel_requested) { 6671 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6672 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6673 } 6674 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6675 6676 /* dump subassembling pattern */ 6677 if (pcbddc->dbg_flag && multilevel_allowed) { 6678 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6679 } 6680 6681 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6682 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6683 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6684 const PetscInt *idxs; 6685 ISLocalToGlobalMapping tmap; 6686 6687 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6688 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6689 /* allocate space for temporary storage */ 6690 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6691 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 6692 /* allocate for IS array */ 6693 nisdofs = pcbddc->n_ISForDofsLocal; 6694 nisneu = !!pcbddc->NeumannBoundariesLocal; 6695 nisvert = 0; /* nisvert is not used */ 6696 nis = nisdofs + nisneu + nisvert; 6697 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 6698 /* dofs splitting */ 6699 for (i=0;i<nisdofs;i++) { 6700 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 6701 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 6702 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6703 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6704 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6705 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6706 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6707 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 6708 } 6709 /* neumann boundaries */ 6710 if (pcbddc->NeumannBoundariesLocal) { 6711 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 6712 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 6713 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6714 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6715 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6716 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6717 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 6718 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 6719 } 6720 /* free memory */ 6721 ierr = PetscFree(tidxs);CHKERRQ(ierr); 6722 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 6723 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6724 } else { 6725 nis = 0; 6726 nisdofs = 0; 6727 nisneu = 0; 6728 nisvert = 0; 6729 isarray = NULL; 6730 } 6731 /* destroy no longer needed map */ 6732 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6733 6734 /* subassemble */ 6735 if (multilevel_allowed) { 6736 Vec vp[1]; 6737 PetscInt nvecs = 0; 6738 PetscBool reuse,reuser; 6739 6740 if (coarse_mat) reuse = PETSC_TRUE; 6741 else reuse = PETSC_FALSE; 6742 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6743 vp[0] = NULL; 6744 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6745 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6746 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6747 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6748 nvecs = 1; 6749 6750 if (pcbddc->divudotp) { 6751 Mat B,loc_divudotp; 6752 Vec v,p; 6753 IS dummy; 6754 PetscInt np; 6755 6756 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6757 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6758 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6759 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6760 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6761 ierr = VecSet(p,1.);CHKERRQ(ierr); 6762 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6763 ierr = VecDestroy(&p);CHKERRQ(ierr); 6764 ierr = MatDestroy(&B);CHKERRQ(ierr); 6765 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6766 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6767 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6768 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6769 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6770 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6771 ierr = VecDestroy(&v);CHKERRQ(ierr); 6772 } 6773 } 6774 if (reuser) { 6775 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6776 } else { 6777 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6778 } 6779 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6780 PetscScalar *arraym,*arrayv; 6781 PetscInt nl; 6782 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 6783 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 6784 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6785 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 6786 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 6787 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 6788 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6789 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 6790 } else { 6791 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 6792 } 6793 } else { 6794 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 6795 } 6796 if (coarse_mat_is || coarse_mat) { 6797 PetscMPIInt size; 6798 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 6799 if (!multilevel_allowed) { 6800 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 6801 } else { 6802 Mat A; 6803 6804 /* if this matrix is present, it means we are not reusing the coarse matrix */ 6805 if (coarse_mat_is) { 6806 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 6807 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 6808 coarse_mat = coarse_mat_is; 6809 } 6810 /* be sure we don't have MatSeqDENSE as local mat */ 6811 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 6812 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 6813 } 6814 } 6815 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 6816 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 6817 6818 /* create local to global scatters for coarse problem */ 6819 if (compute_vecs) { 6820 PetscInt lrows; 6821 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 6822 if (coarse_mat) { 6823 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 6824 } else { 6825 lrows = 0; 6826 } 6827 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 6828 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 6829 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 6830 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6831 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6832 } 6833 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 6834 6835 /* set defaults for coarse KSP and PC */ 6836 if (multilevel_allowed) { 6837 coarse_ksp_type = KSPRICHARDSON; 6838 coarse_pc_type = PCBDDC; 6839 } else { 6840 coarse_ksp_type = KSPPREONLY; 6841 coarse_pc_type = PCREDUNDANT; 6842 } 6843 6844 /* print some info if requested */ 6845 if (pcbddc->dbg_flag) { 6846 if (!multilevel_allowed) { 6847 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6848 if (multilevel_requested) { 6849 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6850 } else if (pcbddc->max_levels) { 6851 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 6852 } 6853 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6854 } 6855 } 6856 6857 /* create the coarse KSP object only once with defaults */ 6858 if (coarse_mat) { 6859 PetscViewer dbg_viewer = NULL; 6860 if (pcbddc->dbg_flag) { 6861 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 6862 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6863 } 6864 if (!pcbddc->coarse_ksp) { 6865 char prefix[256],str_level[16]; 6866 size_t len; 6867 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6868 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6869 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6870 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6871 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6872 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6873 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6874 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6875 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6876 /* prefix */ 6877 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6878 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6879 if (!pcbddc->current_level) { 6880 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6881 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6882 } else { 6883 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6884 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6885 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6886 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6887 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6888 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6889 } 6890 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6891 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6892 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6893 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6894 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6895 /* allow user customization */ 6896 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6897 } 6898 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6899 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6900 if (nisdofs) { 6901 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6902 for (i=0;i<nisdofs;i++) { 6903 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6904 } 6905 } 6906 if (nisneu) { 6907 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6908 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6909 } 6910 if (nisvert) { 6911 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6912 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6913 } 6914 6915 /* get some info after set from options */ 6916 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6917 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6918 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6919 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6920 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6921 isbddc = PETSC_FALSE; 6922 } 6923 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6924 if (isredundant) { 6925 KSP inner_ksp; 6926 PC inner_pc; 6927 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6928 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6929 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6930 } 6931 6932 /* parameters which miss an API */ 6933 if (isbddc) { 6934 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6935 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6936 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6937 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6938 if (pcbddc_coarse->benign_saddle_point) { 6939 Mat coarsedivudotp_is; 6940 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6941 IS row,col; 6942 const PetscInt *gidxs; 6943 PetscInt n,st,M,N; 6944 6945 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6946 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6947 st = st-n; 6948 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6949 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6950 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6951 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6952 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6953 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6954 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6955 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6956 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6957 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6958 ierr = ISDestroy(&row);CHKERRQ(ierr); 6959 ierr = ISDestroy(&col);CHKERRQ(ierr); 6960 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6961 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6962 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6963 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6964 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6965 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6966 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6967 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6968 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 6969 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6970 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6971 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6972 } 6973 } 6974 6975 /* propagate symmetry info of coarse matrix */ 6976 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6977 if (pc->pmat->symmetric_set) { 6978 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6979 } 6980 if (pc->pmat->hermitian_set) { 6981 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6982 } 6983 if (pc->pmat->spd_set) { 6984 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6985 } 6986 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6987 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6988 } 6989 /* set operators */ 6990 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6991 if (pcbddc->dbg_flag) { 6992 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6993 } 6994 } 6995 ierr = PetscFree(isarray);CHKERRQ(ierr); 6996 #if 0 6997 { 6998 PetscViewer viewer; 6999 char filename[256]; 7000 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7001 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7002 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7003 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7004 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7005 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7006 } 7007 #endif 7008 7009 if (pcbddc->coarse_ksp) { 7010 Vec crhs,csol; 7011 7012 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7013 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7014 if (!csol) { 7015 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7016 } 7017 if (!crhs) { 7018 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7019 } 7020 } 7021 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7022 7023 /* compute null space for coarse solver if the benign trick has been requested */ 7024 if (pcbddc->benign_null) { 7025 7026 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7027 for (i=0;i<pcbddc->benign_n;i++) { 7028 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7029 } 7030 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7031 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7032 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7033 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7034 if (coarse_mat) { 7035 Vec nullv; 7036 PetscScalar *array,*array2; 7037 PetscInt nl; 7038 7039 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7040 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7041 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7042 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7043 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7044 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7045 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7046 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7047 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7048 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7049 } 7050 } 7051 7052 if (pcbddc->coarse_ksp) { 7053 PetscBool ispreonly; 7054 7055 if (CoarseNullSpace) { 7056 PetscBool isnull; 7057 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7058 if (isnull) { 7059 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7060 } 7061 /* TODO: add local nullspaces (if any) */ 7062 } 7063 /* setup coarse ksp */ 7064 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7065 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7066 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7067 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7068 KSP check_ksp; 7069 KSPType check_ksp_type; 7070 PC check_pc; 7071 Vec check_vec,coarse_vec; 7072 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7073 PetscInt its; 7074 PetscBool compute_eigs; 7075 PetscReal *eigs_r,*eigs_c; 7076 PetscInt neigs; 7077 const char *prefix; 7078 7079 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7080 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7081 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7082 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7083 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7084 /* prevent from setup unneeded object */ 7085 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7086 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7087 if (ispreonly) { 7088 check_ksp_type = KSPPREONLY; 7089 compute_eigs = PETSC_FALSE; 7090 } else { 7091 check_ksp_type = KSPGMRES; 7092 compute_eigs = PETSC_TRUE; 7093 } 7094 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7095 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7096 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7097 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7098 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7099 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7100 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7101 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7102 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7103 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7104 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7105 /* create random vec */ 7106 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7107 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7108 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7109 /* solve coarse problem */ 7110 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7111 /* set eigenvalue estimation if preonly has not been requested */ 7112 if (compute_eigs) { 7113 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7114 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7115 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7116 if (neigs) { 7117 lambda_max = eigs_r[neigs-1]; 7118 lambda_min = eigs_r[0]; 7119 if (pcbddc->use_coarse_estimates) { 7120 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7121 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7122 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7123 } 7124 } 7125 } 7126 } 7127 7128 /* check coarse problem residual error */ 7129 if (pcbddc->dbg_flag) { 7130 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7131 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7132 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7133 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7134 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7135 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7136 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7137 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7138 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7139 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7140 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7141 if (CoarseNullSpace) { 7142 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7143 } 7144 if (compute_eigs) { 7145 PetscReal lambda_max_s,lambda_min_s; 7146 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7147 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7148 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7149 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7150 for (i=0;i<neigs;i++) { 7151 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7152 } 7153 } 7154 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7155 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7156 } 7157 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7158 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7159 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7160 if (compute_eigs) { 7161 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7162 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7163 } 7164 } 7165 } 7166 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7167 /* print additional info */ 7168 if (pcbddc->dbg_flag) { 7169 /* waits until all processes reaches this point */ 7170 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7171 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7172 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7173 } 7174 7175 /* free memory */ 7176 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7177 PetscFunctionReturn(0); 7178 } 7179 7180 #undef __FUNCT__ 7181 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7182 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7183 { 7184 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7185 PC_IS* pcis = (PC_IS*)pc->data; 7186 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7187 IS subset,subset_mult,subset_n; 7188 PetscInt local_size,coarse_size=0; 7189 PetscInt *local_primal_indices=NULL; 7190 const PetscInt *t_local_primal_indices; 7191 PetscErrorCode ierr; 7192 7193 PetscFunctionBegin; 7194 /* Compute global number of coarse dofs */ 7195 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7196 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7197 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7198 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7199 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7200 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7201 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7202 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7203 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7204 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7205 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7206 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7207 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7208 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7209 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7210 7211 /* check numbering */ 7212 if (pcbddc->dbg_flag) { 7213 PetscScalar coarsesum,*array,*array2; 7214 PetscInt i; 7215 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7216 7217 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7218 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7219 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7220 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7221 /* counter */ 7222 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7223 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7224 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7225 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7226 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7227 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7228 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7229 for (i=0;i<pcbddc->local_primal_size;i++) { 7230 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7231 } 7232 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7233 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7234 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7235 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7236 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7237 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7238 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7239 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7240 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7241 for (i=0;i<pcis->n;i++) { 7242 if (array[i] != 0.0 && array[i] != array2[i]) { 7243 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7244 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7245 set_error = PETSC_TRUE; 7246 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7247 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7248 } 7249 } 7250 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7251 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7252 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7253 for (i=0;i<pcis->n;i++) { 7254 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7255 } 7256 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7257 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7258 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7259 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7260 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7261 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7262 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7263 PetscInt *gidxs; 7264 7265 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7266 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7267 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7268 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7269 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7270 for (i=0;i<pcbddc->local_primal_size;i++) { 7271 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7272 } 7273 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7274 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7275 } 7276 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7277 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7278 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7279 } 7280 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7281 /* get back data */ 7282 *coarse_size_n = coarse_size; 7283 *local_primal_indices_n = local_primal_indices; 7284 PetscFunctionReturn(0); 7285 } 7286 7287 #undef __FUNCT__ 7288 #define __FUNCT__ "PCBDDCGlobalToLocal" 7289 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7290 { 7291 IS localis_t; 7292 PetscInt i,lsize,*idxs,n; 7293 PetscScalar *vals; 7294 PetscErrorCode ierr; 7295 7296 PetscFunctionBegin; 7297 /* get indices in local ordering exploiting local to global map */ 7298 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7299 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7300 for (i=0;i<lsize;i++) vals[i] = 1.0; 7301 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7302 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7303 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7304 if (idxs) { /* multilevel guard */ 7305 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7306 } 7307 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7308 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7309 ierr = PetscFree(vals);CHKERRQ(ierr); 7310 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7311 /* now compute set in local ordering */ 7312 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7313 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7314 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7315 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7316 for (i=0,lsize=0;i<n;i++) { 7317 if (PetscRealPart(vals[i]) > 0.5) { 7318 lsize++; 7319 } 7320 } 7321 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7322 for (i=0,lsize=0;i<n;i++) { 7323 if (PetscRealPart(vals[i]) > 0.5) { 7324 idxs[lsize++] = i; 7325 } 7326 } 7327 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7328 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7329 *localis = localis_t; 7330 PetscFunctionReturn(0); 7331 } 7332 7333 #undef __FUNCT__ 7334 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7335 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7336 { 7337 PC_IS *pcis=(PC_IS*)pc->data; 7338 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7339 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7340 Mat S_j; 7341 PetscInt *used_xadj,*used_adjncy; 7342 PetscBool free_used_adj; 7343 PetscErrorCode ierr; 7344 7345 PetscFunctionBegin; 7346 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7347 free_used_adj = PETSC_FALSE; 7348 if (pcbddc->sub_schurs_layers == -1) { 7349 used_xadj = NULL; 7350 used_adjncy = NULL; 7351 } else { 7352 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7353 used_xadj = pcbddc->mat_graph->xadj; 7354 used_adjncy = pcbddc->mat_graph->adjncy; 7355 } else if (pcbddc->computed_rowadj) { 7356 used_xadj = pcbddc->mat_graph->xadj; 7357 used_adjncy = pcbddc->mat_graph->adjncy; 7358 } else { 7359 PetscBool flg_row=PETSC_FALSE; 7360 const PetscInt *xadj,*adjncy; 7361 PetscInt nvtxs; 7362 7363 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7364 if (flg_row) { 7365 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7366 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7367 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7368 free_used_adj = PETSC_TRUE; 7369 } else { 7370 pcbddc->sub_schurs_layers = -1; 7371 used_xadj = NULL; 7372 used_adjncy = NULL; 7373 } 7374 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7375 } 7376 } 7377 7378 /* setup sub_schurs data */ 7379 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7380 if (!sub_schurs->schur_explicit) { 7381 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7382 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7383 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7384 } else { 7385 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7386 PetscBool isseqaij,need_change = PETSC_FALSE; 7387 PetscInt benign_n; 7388 Mat change = NULL; 7389 Vec scaling = NULL; 7390 IS change_primal = NULL; 7391 7392 if (!pcbddc->use_vertices && reuse_solvers) { 7393 PetscInt n_vertices; 7394 7395 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7396 reuse_solvers = (PetscBool)!n_vertices; 7397 } 7398 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7399 if (!isseqaij) { 7400 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7401 if (matis->A == pcbddc->local_mat) { 7402 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7403 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7404 } else { 7405 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7406 } 7407 } 7408 if (!pcbddc->benign_change_explicit) { 7409 benign_n = pcbddc->benign_n; 7410 } else { 7411 benign_n = 0; 7412 } 7413 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7414 We need a global reduction to avoid possible deadlocks. 7415 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7416 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7417 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7418 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7419 need_change = (PetscBool)(!need_change); 7420 } 7421 /* If the user defines additional constraints, we import them here. 7422 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7423 if (need_change) { 7424 PC_IS *pcisf; 7425 PC_BDDC *pcbddcf; 7426 PC pcf; 7427 7428 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7429 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7430 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7431 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7432 /* hacks */ 7433 pcisf = (PC_IS*)pcf->data; 7434 pcisf->is_B_local = pcis->is_B_local; 7435 pcisf->vec1_N = pcis->vec1_N; 7436 pcisf->BtoNmap = pcis->BtoNmap; 7437 pcisf->n = pcis->n; 7438 pcisf->n_B = pcis->n_B; 7439 pcbddcf = (PC_BDDC*)pcf->data; 7440 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7441 pcbddcf->mat_graph = pcbddc->mat_graph; 7442 pcbddcf->use_faces = PETSC_TRUE; 7443 pcbddcf->use_change_of_basis = PETSC_TRUE; 7444 pcbddcf->use_change_on_faces = PETSC_TRUE; 7445 pcbddcf->use_qr_single = PETSC_TRUE; 7446 pcbddcf->fake_change = PETSC_TRUE; 7447 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7448 /* store information on primal vertices and change of basis (in local numbering) */ 7449 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7450 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7451 change = pcbddcf->ConstraintMatrix; 7452 pcbddcf->ConstraintMatrix = NULL; 7453 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7454 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7455 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7456 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7457 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7458 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7459 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7460 pcf->ops->destroy = NULL; 7461 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7462 } 7463 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7464 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7465 ierr = MatDestroy(&change);CHKERRQ(ierr); 7466 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7467 } 7468 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7469 7470 /* free adjacency */ 7471 if (free_used_adj) { 7472 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7473 } 7474 PetscFunctionReturn(0); 7475 } 7476 7477 #undef __FUNCT__ 7478 #define __FUNCT__ "PCBDDCInitSubSchurs" 7479 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7480 { 7481 PC_IS *pcis=(PC_IS*)pc->data; 7482 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7483 PCBDDCGraph graph; 7484 PetscErrorCode ierr; 7485 7486 PetscFunctionBegin; 7487 /* attach interface graph for determining subsets */ 7488 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7489 IS verticesIS,verticescomm; 7490 PetscInt vsize,*idxs; 7491 7492 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7493 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7494 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7495 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7496 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7497 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7498 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7499 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7500 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7501 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7502 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7503 } else { 7504 graph = pcbddc->mat_graph; 7505 } 7506 /* print some info */ 7507 if (pcbddc->dbg_flag) { 7508 IS vertices; 7509 PetscInt nv,nedges,nfaces; 7510 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7511 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7512 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7513 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7514 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7515 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7516 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7517 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7518 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7519 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7520 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7521 } 7522 7523 /* sub_schurs init */ 7524 if (!pcbddc->sub_schurs) { 7525 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7526 } 7527 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 7528 7529 /* free graph struct */ 7530 if (pcbddc->sub_schurs_rebuild) { 7531 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7532 } 7533 PetscFunctionReturn(0); 7534 } 7535 7536 #undef __FUNCT__ 7537 #define __FUNCT__ "PCBDDCCheckOperator" 7538 PetscErrorCode PCBDDCCheckOperator(PC pc) 7539 { 7540 PC_IS *pcis=(PC_IS*)pc->data; 7541 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7542 PetscErrorCode ierr; 7543 7544 PetscFunctionBegin; 7545 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7546 IS zerodiag = NULL; 7547 Mat S_j,B0_B=NULL; 7548 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7549 PetscScalar *p0_check,*array,*array2; 7550 PetscReal norm; 7551 PetscInt i; 7552 7553 /* B0 and B0_B */ 7554 if (zerodiag) { 7555 IS dummy; 7556 7557 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7558 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7559 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7560 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7561 } 7562 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7563 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7564 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7565 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7566 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7567 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7568 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7569 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7570 /* S_j */ 7571 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7572 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7573 7574 /* mimic vector in \widetilde{W}_\Gamma */ 7575 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7576 /* continuous in primal space */ 7577 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7578 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7579 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7580 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7581 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7582 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7583 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7584 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7585 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7586 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7587 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7588 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7589 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7590 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7591 7592 /* assemble rhs for coarse problem */ 7593 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7594 /* local with Schur */ 7595 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7596 if (zerodiag) { 7597 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7598 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7599 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7600 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7601 } 7602 /* sum on primal nodes the local contributions */ 7603 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7604 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7605 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7606 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7607 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7608 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7609 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7610 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7611 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7612 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7613 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7614 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7615 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7616 /* scale primal nodes (BDDC sums contibutions) */ 7617 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7618 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7619 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7620 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7621 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7622 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7623 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7624 /* global: \widetilde{B0}_B w_\Gamma */ 7625 if (zerodiag) { 7626 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7627 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7628 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7629 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7630 } 7631 /* BDDC */ 7632 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7633 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7634 7635 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7636 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7637 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7638 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7639 for (i=0;i<pcbddc->benign_n;i++) { 7640 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7641 } 7642 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7643 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7644 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7645 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7646 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7647 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7648 } 7649 PetscFunctionReturn(0); 7650 } 7651