1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 84 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 85 86 /* gradients */ 87 ptr = work + 5*esize; 88 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 89 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 90 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 91 ierr = MatDestroy(&GE);CHKERRQ(ierr); 92 93 /* constants */ 94 ptr += rsize*csize; 95 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 96 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 97 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 98 ierr = MatDestroy(&GE);CHKERRQ(ierr); 99 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 100 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "PCBDDCNedelecSupport" 106 PetscErrorCode PCBDDCNedelecSupport(PC pc) 107 { 108 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 109 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 110 Mat G,T,conn,lG,lGt,lGis,lGall,lGe; 111 MatNullSpace nnsp; 112 Vec tvec,*quads; 113 PetscSF sfv; 114 ISLocalToGlobalMapping el2g,vl2g; 115 MPI_Comm comm; 116 IS lned,primals; 117 IS *eedges,*extrows,*extcols; 118 PetscBT btv,bte,btvc,btb,btvcand,btvi; 119 PetscScalar *vals,*work; 120 PetscReal *rwork; 121 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 122 PetscInt ne,nv,Ne,Nv,Le,Lv,order; 123 PetscInt n_neigh,*neigh,*n_shared,**shared; 124 PetscInt i,j,extmem,cum,maxsize,rst,nee,nquads=2; 125 PetscInt *extrow,*extrowcum,*marks,*emarks,*vmarks,*gidxs; 126 PetscInt *sfvleaves,*sfvroots; 127 PetscBool print,eerr,done,lrc[2],conforming,ismpiaij; 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 /* test variable order code and print debug info TODO: to be removed */ 132 print = PETSC_FALSE; 133 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 134 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 135 136 /* Return to caller if there are no edges in the decomposition */ 137 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 138 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 139 ierr = ISLocalToGlobalMappingGetSize(el2g,&ne);CHKERRQ(ierr); 140 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 141 lrc[0] = PETSC_FALSE; 142 for (i=0;i<ne;i++) { 143 if (PetscRealPart(vals[i]) > 2.) { 144 lrc[0] = PETSC_TRUE; 145 break; 146 } 147 } 148 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 149 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 150 if (!lrc[1]) PetscFunctionReturn(0); 151 152 /* Get relevant objects */ 153 G = pcbddc->discretegradient; 154 order = pcbddc->nedorder; 155 conforming = pcbddc->conforming; 156 157 /* Compute local and global sizes of egde dofs and nodal dofs */ 158 ierr = MatGetSize(G,&Ne,&Nv);CHKERRQ(ierr); 159 ierr = MatGetLocalSize(G,&Le,&Lv);CHKERRQ(ierr); 160 161 /* Sanity checks */ 162 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 163 ierr = PetscObjectTypeCompare((PetscObject)G,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 164 if (!ismpiaij) SETERRQ1(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with discrete gradient of type %s. Please use MPIAIJ",((PetscObject)G)->type_name); 165 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 166 if (order && Ne%order) SETERRQ2(comm,PETSC_ERR_USER,"The number of edge dofs %d it's not a multiple of the order %d",Ne,order); 167 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 168 ierr = MatGetSize(pc->pmat,&i,NULL);CHKERRQ(ierr); 169 if (Ne != i) SETERRQ2(comm,PETSC_ERR_USER,"Global matrix sizes should match! Number of rows of G %d differs from that of A %d",Ne,i); 170 ierr = MatGetLocalSize(pc->pmat,&i,NULL);CHKERRQ(ierr); 171 if (Le != i) SETERRQ2(comm,PETSC_ERR_USER,"Local matrix sizes should match! Number of local rows of G %d differs from that of A %d",Le,i); 172 173 /* Drop connections for interior edges (this modifies G) */ 174 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 175 ierr = VecScatterBegin(matis->rctx,matis->counter,tvec,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 176 ierr = VecScatterEnd(matis->rctx,matis->counter,tvec,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 177 ierr = PetscMalloc1(Le,&marks);CHKERRQ(ierr); 178 ierr = VecGetOwnershipRange(tvec,&rst,NULL);CHKERRQ(ierr); 179 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&vals);CHKERRQ(ierr); 180 cum = 0; 181 for (i=0;i<Le;i++) if (PetscRealPart(vals[i]) < 1.5) marks[cum++] = i+rst; 182 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&vals);CHKERRQ(ierr); 183 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 184 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 185 ierr = MatZeroRows(G,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 186 ierr = PetscFree(marks);CHKERRQ(ierr); 187 188 /* Extract subdomain relevant rows of G */ 189 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 190 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 191 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 193 ierr = ISDestroy(&lned);CHKERRQ(ierr); 194 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 195 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 196 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 197 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 198 if (print) { 199 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 200 ierr = MatView(lG,NULL);CHKERRQ(ierr); 201 } 202 203 /* SF for nodal communications */ 204 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 205 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 207 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 208 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 209 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 210 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 211 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 212 213 /* Destroy temporary G created in MATIS format */ 214 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 215 216 /* Analyze the edge-nodes connections (duplicate lG) */ 217 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 218 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 219 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 220 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 221 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 222 /* need to import the boundary specification to ensure the 223 proper detection of coarse edges' endpoints */ 224 if (pcbddc->DirichletBoundariesLocal) { 225 ierr = ISGetLocalSize(pcbddc->DirichletBoundariesLocal,&cum);CHKERRQ(ierr); 226 ierr = ISGetIndices(pcbddc->DirichletBoundariesLocal,&idxs);CHKERRQ(ierr); 227 for (i=0;i<cum;i++) { 228 if (idxs[i] >= 0) { 229 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 230 } 231 } 232 ierr = ISRestoreIndices(pcbddc->DirichletBoundariesLocal,&idxs);CHKERRQ(ierr); 233 } 234 if (pcbddc->NeumannBoundariesLocal) { 235 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&cum);CHKERRQ(ierr); 236 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 237 for (i=0;i<cum;i++) { 238 if (idxs[i] >= 0) { 239 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 240 } 241 } 242 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 243 } 244 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 245 /* need to remove coarse faces' dofs to ensure the 246 proper detection of coarse edges' endpoints */ 247 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 248 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 249 for (i=1;i<n_neigh;i++) { 250 PetscInt j; 251 for (j=0;j<n_shared[i];j++) marks[shared[i][j]]++; 252 } 253 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 254 cum = 0; 255 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 256 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 257 for (i=0;i<ne;i++) { 258 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 259 if (!marks[i] || (marks[i] == 1 && !PetscBTLookup(btb,i))) { 260 marks[cum++] = i; 261 continue; 262 } 263 /* set badly connected edge dofs as primal */ 264 if (!conforming && ii[i+1]-ii[i] != order + 1) { 265 marks[cum++] = i; 266 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 267 for (j=ii[i];j<ii[i+1];j++) { 268 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 269 } 270 } 271 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 272 if (!order && ii[i+1] != ii[i]) { 273 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 274 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 275 } 276 } 277 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 278 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 279 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 280 /* identify splitpoints and corner candidates: TODO variable order */ 281 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 282 if (print) { 283 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 284 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 285 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 286 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 287 } 288 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 289 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 290 for (i=0;i<nv;i++) { 291 PetscInt ord = order, test = ii[i+1]-ii[i]; 292 if (!order) { 293 PetscReal vorder = 0.; 294 295 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 296 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 297 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 298 ord = 1; 299 } 300 #if defined(PETSC_USE_DEBUG) 301 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 302 #endif 303 if (test >= 3*ord) { /* splitpoints */ 304 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 305 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 306 } else if (test == ord) { 307 if (order == 1) { 308 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 309 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 310 } else { 311 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 312 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 313 } 314 } 315 } 316 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 317 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 318 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 319 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 320 321 /* Get the local G^T explicitly */ 322 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 323 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 324 if (print) { 325 ierr = PetscObjectSetName((PetscObject)lGt,"initial_lGt");CHKERRQ(ierr); 326 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 327 } 328 329 /* Mark interior nodal dofs */ 330 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 331 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 332 for (i=1;i<n_neigh;i++) { 333 for (j=0;j<n_shared[i];j++) { 334 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 335 } 336 } 337 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 338 339 /* communicate corners and splitpoints */ 340 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 341 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 342 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 343 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 344 345 if (print) { 346 IS tbz; 347 348 cum = 0; 349 for (i=0;i<nv;i++) 350 if (sfvleaves[i]) 351 vmarks[cum++] = i; 352 353 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 354 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 355 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 356 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 357 } 358 359 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 360 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 361 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 362 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 363 364 /* Zero rows of lGt corresponding to identified corners 365 and interior nodal dofs */ 366 cum = 0; 367 for (i=0;i<nv;i++) { 368 if (sfvleaves[i]) { 369 vmarks[cum++] = i; 370 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 371 } 372 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 373 } 374 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 375 if (print) { 376 IS tbz; 377 378 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 379 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 380 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 381 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 382 } 383 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 384 ierr = PetscFree(vmarks);CHKERRQ(ierr); 385 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 386 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 387 388 /* Recompute G */ 389 ierr = MatDestroy(&lG);CHKERRQ(ierr); 390 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 391 if (print) { 392 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 393 ierr = MatView(lG,NULL);CHKERRQ(ierr); 394 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 395 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 396 } 397 398 /* Get primal dofs (if any) */ 399 cum = 0; 400 for (i=0;i<ne;i++) { 401 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 402 } 403 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 404 if (print) { 405 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 406 ierr = ISView(primals,NULL);CHKERRQ(ierr); 407 } 408 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 409 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 410 ierr = ISDestroy(&primals);CHKERRQ(ierr); 411 412 /* Compute edge connectivity */ 413 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 414 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 415 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 416 if (done && i && ii && jj) { /* when lG is empty, don't pass pointers */ 417 ierr = PCBDDCSetLocalAdjacencyGraph(pc,ne,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 418 } 419 /* Analyze interface for edge dofs */ 420 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 421 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 422 423 /* Get coarse edges in the edge space */ 424 pcbddc->mat_graph->twodim = PETSC_FALSE; 425 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 426 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 427 428 /* Mark fine edge dofs with their coarse edge id */ 429 maxsize = 0; 430 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 431 for (i=0;i<nee;i++) { 432 PetscInt size,mark = i+1; 433 434 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 435 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 436 for (j=0;j<size;j++) marks[idxs[j]] = mark; 437 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 438 maxsize = PetscMax(maxsize,size); 439 } 440 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 441 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 442 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 443 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 444 if (print) { 445 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 446 ierr = ISView(primals,NULL);CHKERRQ(ierr); 447 } 448 449 /* Find coarse edge endpoints */ 450 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 451 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 452 for (i=0;i<nee;i++) { 453 PetscInt mark = i+1,size; 454 455 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 456 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 457 if (print) { 458 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 459 ISView(eedges[i],NULL); 460 } 461 for (j=0;j<size;j++) { 462 PetscInt k, ee = idxs[j]; 463 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 464 for (k=ii[ee];k<ii[ee+1];k++) { 465 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 466 if (PetscBTLookup(btv,jj[k])) { 467 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 468 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 469 if (print) PetscPrintf(PETSC_COMM_SELF," candidate? %d\n",(PetscBool)(PetscBTLookup(btvcand,jj[k]))); 470 PetscInt k2; 471 PetscBool corner = PETSC_FALSE; 472 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 473 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: count %d, mark %d (ref mark %d), special %d\n",jjt[k2],pcbddc->mat_graph->count[jjt[k2]],marks[jjt[k2]],mark,pcbddc->mat_graph->special_dof[jjt[k2]]); 474 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || 475 pcbddc->mat_graph->special_dof[jjt[k2]] == PCBDDCGRAPH_DIRICHLET_MARK) { 476 corner = PETSC_TRUE; 477 break; 478 } 479 } 480 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 481 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 482 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 483 } else { 484 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 485 } 486 } 487 } 488 } 489 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 490 } 491 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 492 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 493 494 /* Reset marked primal dofs */ 495 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 496 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 497 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 498 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 499 500 /* Compute extended cols indices */ 501 ierr = MatGetRowIJ(lG ,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 502 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 503 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 504 i *= maxsize; 505 ierr = PetscMalloc1(nee,&extcols);CHKERRQ(ierr); 506 ierr = PetscCalloc1(nee,&emarks);CHKERRQ(ierr); 507 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 508 eerr = PETSC_FALSE; 509 for (i=0;i<nee;i++) { 510 PetscInt size; 511 512 cum = 0; 513 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 514 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 515 for (j=0;j<size;j++) { 516 PetscInt k,ee = idxs[j]; 517 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 518 } 519 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 520 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 521 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 522 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 523 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 524 /* it may happen that endpoints are not defined at this point 525 if it is the case, mark this edge for a second pass */ 526 if (cum != size -1) { 527 emarks[i] = 1; 528 if (print) { 529 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 530 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 531 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 532 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 533 } 534 eerr = PETSC_TRUE; 535 } 536 } 537 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 538 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 539 if (done) { 540 PetscInt *newprimals; 541 542 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 543 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 544 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 545 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 546 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 547 for (i=0;i<nee;i++) { 548 if (emarks[i]) { 549 PetscInt size,mark = i+1; 550 551 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 552 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 553 for (j=0;j<size;j++) { 554 /* newprimals[cum++] = idxs[j]; */ 555 PetscInt k,ee = idxs[j]; 556 for (k=ii[ee];k<ii[ee+1];k++) { 557 /* set all candidates located on the edge as corners */ 558 if (PetscBTLookup(btvcand,jj[k])) { 559 PetscInt k2,vv = jj[k]; 560 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 561 /* set all edge dofs connected to candidate as primals */ 562 for (k2=iit[vv];k2<iit[vv+1];k2++) { 563 if (marks[jjt[k2]] == mark) { 564 PetscInt k3,ee2 = jjt[k2]; 565 newprimals[cum++] = ee2; 566 /* finally set the new corners */ 567 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 568 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 569 } 570 } 571 } 572 } 573 } 574 } 575 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 576 } 577 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 578 } 579 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 580 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 581 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 582 ierr = PetscFree(newprimals);CHKERRQ(ierr); 583 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 584 ierr = ISDestroy(&primals);CHKERRQ(ierr); 585 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 586 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 587 588 /* Mark again */ 589 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 590 for (i=0;i<nee;i++) { 591 PetscInt size,mark = i+1; 592 593 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 594 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 595 for (j=0;j<size;j++) marks[idxs[j]] = mark; 596 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 597 } 598 if (print) { 599 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 600 ierr = ISView(primals,NULL);CHKERRQ(ierr); 601 } 602 603 /* Recompute extended cols */ 604 eerr = PETSC_FALSE; 605 for (i=0;i<nee;i++) { 606 PetscInt size; 607 608 cum = 0; 609 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 610 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 611 for (j=0;j<size;j++) { 612 PetscInt k,ee = idxs[j]; 613 for (k=ii[ee];k<ii[ee+1];k++) { 614 if (!PetscBTLookup(btv,jj[k])) { 615 extrow[cum++] = jj[k]; 616 } 617 } 618 } 619 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 620 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 621 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 622 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 623 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 624 if (cum != size -1) { 625 if (print) { 626 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 627 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 628 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 629 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 630 } 631 eerr = PETSC_TRUE; 632 } 633 } 634 } 635 ierr = MatRestoreRowIJ( lG,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 636 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 637 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 638 ierr = PetscFree(emarks);CHKERRQ(ierr); 639 /* an error should not occur at this point */ 640 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 641 642 /* Check the number of endpoints */ 643 /* TODO: add case for circular edge */ 644 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 645 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 646 for (i=0;i<nee;i++) { 647 PetscInt size, found = 0; 648 649 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 650 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 651 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 652 for (j=0;j<size;j++) { 653 PetscInt k,ee = idxs[j]; 654 for (k=ii[ee];k<ii[ee+1];k++) { 655 PetscInt vv = jj[k]; 656 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 657 found++; 658 } 659 } 660 } 661 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 662 if (found != 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d\n",found,i); 663 } 664 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 665 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 666 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 667 668 #if defined(PETSC_USE_DEBUG) 669 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 670 not interfere with neighbouring coarse edges */ 671 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 672 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 673 for (i=0;i<nv;i++) { 674 PetscInt emax = 0,eemax = 0; 675 676 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 677 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 678 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 679 for (j=1;j<nee+1;j++) { 680 if (emax < emarks[j]) { 681 emax = emarks[j]; 682 eemax = j; 683 } 684 } 685 /* not relevant for edges */ 686 if (!eemax) continue; 687 688 for (j=ii[i];j<ii[i+1];j++) { 689 if (marks[jj[j]] && marks[jj[j]] != eemax) { 690 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dod %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 691 } 692 } 693 } 694 ierr = PetscFree(emarks);CHKERRQ(ierr); 695 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 696 #endif 697 698 /* Compute extended rows indices for edge blocks of the change of basis */ 699 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 700 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 701 extmem *= maxsize; 702 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 703 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 704 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 705 for (i=0;i<nv;i++) { 706 PetscInt mark = 0,size,start; 707 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 708 for (j=ii[i];j<ii[i+1];j++) 709 if (marks[jj[j]] && !mark) 710 mark = marks[jj[j]]; 711 712 /* not relevant */ 713 if (!mark) continue; 714 715 /* import extended row */ 716 mark--; 717 start = mark*extmem+extrowcum[mark]; 718 size = ii[i+1]-ii[i]; 719 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 720 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 721 extrowcum[mark] += size; 722 } 723 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 724 cum = 0; 725 for (i=0;i<nee;i++) { 726 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 727 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 728 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 729 cum = PetscMax(cum,size); 730 } 731 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 732 ierr = PetscFree(marks);CHKERRQ(ierr); 733 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 734 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 735 736 /* Workspace for lapack inner calls and VecSetValues */ 737 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 738 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 739 for (i=0;i<maxsize;i++) vals[i] = 1.; 740 741 /* Create vectors for quadrature rules */ 742 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 743 for (i=0;i<nquads;i++) { 744 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 745 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 746 } 747 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 748 749 /* Create change of basis matrix (preallocation can be improved) */ 750 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 751 ierr = MatSetSizes(T,Le,Le,Ne,Ne);CHKERRQ(ierr); 752 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 753 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 754 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 755 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 756 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 757 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 758 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 759 760 /* Defaults to identity */ 761 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 762 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 763 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 764 765 for (i=0;i<nee;i++) { 766 Mat Gins = NULL, GKins = NULL; 767 768 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 769 if (Gins && GKins) { 770 PetscScalar *data; 771 const PetscInt *rows,*cols; 772 PetscInt nrh,nch,nrc,ncc; 773 774 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 775 /* H1 */ 776 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 777 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 778 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 779 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 780 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 781 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 782 /* complement */ 783 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 784 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 785 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 786 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 787 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 788 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 789 /* Gins kernel quadratures */ 790 for (j=0;j<ncc;j++) { 791 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 792 } 793 /* H1 average */ 794 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 795 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 796 } 797 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 798 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 799 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 800 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 801 } 802 803 /* Start assembling */ 804 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 805 for (i=0;i<nquads;i++) { 806 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 807 } 808 809 /* Free */ 810 ierr = PetscFree(extrow);CHKERRQ(ierr); 811 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 812 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 813 ierr = PetscFree(vals);CHKERRQ(ierr); 814 ierr = PetscFree(extrows);CHKERRQ(ierr); 815 ierr = PetscFree(extcols);CHKERRQ(ierr); 816 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 817 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 818 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 819 ierr = MatDestroy(&lG);CHKERRQ(ierr); 820 ierr = MatDestroy(&conn);CHKERRQ(ierr); 821 822 /* Complete assembling */ 823 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 824 for (i=0;i<nquads;i++) { 825 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 826 } 827 for (i=0;i<nquads;i++) { 828 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(quads);CHKERRQ(ierr); 831 832 /* tell PCBDDC the topography has been analyzed */ 833 pcbddc->recompute_topography = PETSC_FALSE; 834 835 /* set change of basis */ 836 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 837 ierr = MatDestroy(&T);CHKERRQ(ierr); 838 839 /* set quadratures */ 840 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 841 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 842 843 PetscFunctionReturn(0); 844 } 845 846 /* the near-null space of BDDC carries information on quadrature weights, 847 and these can be collinear -> so cheat with MatNullSpaceCreate 848 and create a suitable set of basis vectors first */ 849 #undef __FUNCT__ 850 #define __FUNCT__ "PCBDDCNullSpaceCreate" 851 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 852 { 853 PetscErrorCode ierr; 854 PetscInt i; 855 856 PetscFunctionBegin; 857 for (i=0;i<nvecs;i++) { 858 PetscInt first,last; 859 860 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 861 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 862 if (i>=first && i < last) { 863 PetscScalar *data; 864 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 865 if (!has_const) { 866 data[i-first] = 1.; 867 } else { 868 data[2*i-first] = 1./PetscSqrtReal(2.); 869 data[2*i-first+1] = -1./PetscSqrtReal(2.); 870 } 871 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 872 } 873 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 874 } 875 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 876 for (i=0;i<nvecs;i++) { /* reset vectors */ 877 PetscInt first,last; 878 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 879 if (i>=first && i < last) { 880 PetscScalar *data; 881 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 882 if (!has_const) { 883 data[i-first] = 0.; 884 } else { 885 data[2*i-first] = 0.; 886 data[2*i-first+1] = 0.; 887 } 888 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 889 } 890 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 891 } 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 897 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 898 { 899 Mat loc_divudotp; 900 Vec p,v,vins,quad_vec,*quad_vecs; 901 ISLocalToGlobalMapping map; 902 IS *faces,*edges; 903 PetscScalar *vals; 904 const PetscScalar *array; 905 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 906 PetscMPIInt rank; 907 PetscErrorCode ierr; 908 909 PetscFunctionBegin; 910 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 911 if (graph->twodim) { 912 lmaxneighs = 2; 913 } else { 914 lmaxneighs = 1; 915 for (i=0;i<ne;i++) { 916 const PetscInt *idxs; 917 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 918 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 919 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 920 } 921 lmaxneighs++; /* graph count does not include self */ 922 } 923 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 924 maxsize = 0; 925 for (i=0;i<ne;i++) { 926 PetscInt nn; 927 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 928 maxsize = PetscMax(maxsize,nn); 929 } 930 for (i=0;i<nf;i++) { 931 PetscInt nn; 932 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 933 maxsize = PetscMax(maxsize,nn); 934 } 935 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 936 /* create vectors to hold quadrature weights */ 937 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 938 if (!transpose) { 939 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 940 } else { 941 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 942 } 943 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 944 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 945 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 946 for (i=0;i<maxneighs;i++) { 947 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 948 } 949 950 /* compute local quad vec */ 951 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 952 if (!transpose) { 953 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 954 } else { 955 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 956 } 957 ierr = VecSet(p,1.);CHKERRQ(ierr); 958 if (!transpose) { 959 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 960 } else { 961 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 962 } 963 if (vl2l) { 964 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 965 } else { 966 vins = v; 967 } 968 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 969 ierr = VecDestroy(&p);CHKERRQ(ierr); 970 971 /* insert in global quadrature vecs */ 972 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 973 for (i=0;i<nf;i++) { 974 const PetscInt *idxs; 975 PetscInt idx,nn,j; 976 977 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 978 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 979 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 980 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 981 idx = -(idx+1); 982 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 983 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 984 } 985 for (i=0;i<ne;i++) { 986 const PetscInt *idxs; 987 PetscInt idx,nn,j; 988 989 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 990 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 991 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 992 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 993 idx = -(idx+1); 994 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 995 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 996 } 997 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 998 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 999 if (vl2l) { 1000 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1001 } 1002 ierr = VecDestroy(&v);CHKERRQ(ierr); 1003 ierr = PetscFree(vals);CHKERRQ(ierr); 1004 1005 /* assemble near null space */ 1006 for (i=0;i<maxneighs;i++) { 1007 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1008 } 1009 for (i=0;i<maxneighs;i++) { 1010 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1011 } 1012 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1019 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1020 { 1021 PetscErrorCode ierr; 1022 Vec local,global; 1023 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1024 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1025 1026 PetscFunctionBegin; 1027 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1028 /* need to convert from global to local topology information and remove references to information in global ordering */ 1029 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1030 if (pcbddc->user_provided_isfordofs) { 1031 if (pcbddc->n_ISForDofs) { 1032 PetscInt i; 1033 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1034 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1035 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1036 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1037 } 1038 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1039 pcbddc->n_ISForDofs = 0; 1040 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1041 } 1042 } else { 1043 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1044 PetscInt i, n = matis->A->rmap->n; 1045 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1046 if (i > 1) { 1047 pcbddc->n_ISForDofsLocal = i; 1048 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1049 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1050 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1051 } 1052 } 1053 } 1054 } 1055 1056 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1057 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1058 } 1059 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1060 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1061 } 1062 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1063 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1064 } 1065 ierr = VecDestroy(&global);CHKERRQ(ierr); 1066 ierr = VecDestroy(&local);CHKERRQ(ierr); 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1072 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1073 { 1074 PC_IS *pcis = (PC_IS*)(pc->data); 1075 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1076 PetscErrorCode ierr; 1077 1078 PetscFunctionBegin; 1079 if (!pcbddc->benign_have_null) { 1080 PetscFunctionReturn(0); 1081 } 1082 if (pcbddc->ChangeOfBasisMatrix) { 1083 Vec swap; 1084 1085 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1086 swap = pcbddc->work_change; 1087 pcbddc->work_change = r; 1088 r = swap; 1089 } 1090 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1093 ierr = VecSet(z,0.);CHKERRQ(ierr); 1094 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1095 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1096 if (pcbddc->ChangeOfBasisMatrix) { 1097 Vec swap; 1098 1099 swap = r; 1100 r = pcbddc->work_change; 1101 pcbddc->work_change = swap; 1102 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1103 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1104 } 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1110 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1111 { 1112 PCBDDCBenignMatMult_ctx ctx; 1113 PetscErrorCode ierr; 1114 PetscBool apply_right,apply_left,reset_x; 1115 1116 PetscFunctionBegin; 1117 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1118 if (transpose) { 1119 apply_right = ctx->apply_left; 1120 apply_left = ctx->apply_right; 1121 } else { 1122 apply_right = ctx->apply_right; 1123 apply_left = ctx->apply_left; 1124 } 1125 reset_x = PETSC_FALSE; 1126 if (apply_right) { 1127 const PetscScalar *ax; 1128 PetscInt nl,i; 1129 1130 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1131 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1132 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1133 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1134 for (i=0;i<ctx->benign_n;i++) { 1135 PetscScalar sum,val; 1136 const PetscInt *idxs; 1137 PetscInt nz,j; 1138 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1139 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1140 sum = 0.; 1141 if (ctx->apply_p0) { 1142 val = ctx->work[idxs[nz-1]]; 1143 for (j=0;j<nz-1;j++) { 1144 sum += ctx->work[idxs[j]]; 1145 ctx->work[idxs[j]] += val; 1146 } 1147 } else { 1148 for (j=0;j<nz-1;j++) { 1149 sum += ctx->work[idxs[j]]; 1150 } 1151 } 1152 ctx->work[idxs[nz-1]] -= sum; 1153 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1154 } 1155 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1156 reset_x = PETSC_TRUE; 1157 } 1158 if (transpose) { 1159 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1160 } else { 1161 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1162 } 1163 if (reset_x) { 1164 ierr = VecResetArray(x);CHKERRQ(ierr); 1165 } 1166 if (apply_left) { 1167 PetscScalar *ay; 1168 PetscInt i; 1169 1170 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1171 for (i=0;i<ctx->benign_n;i++) { 1172 PetscScalar sum,val; 1173 const PetscInt *idxs; 1174 PetscInt nz,j; 1175 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1176 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1177 val = -ay[idxs[nz-1]]; 1178 if (ctx->apply_p0) { 1179 sum = 0.; 1180 for (j=0;j<nz-1;j++) { 1181 sum += ay[idxs[j]]; 1182 ay[idxs[j]] += val; 1183 } 1184 ay[idxs[nz-1]] += sum; 1185 } else { 1186 for (j=0;j<nz-1;j++) { 1187 ay[idxs[j]] += val; 1188 } 1189 ay[idxs[nz-1]] = 0.; 1190 } 1191 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1192 } 1193 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1200 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1201 { 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1211 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1212 { 1213 PetscErrorCode ierr; 1214 1215 PetscFunctionBegin; 1216 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "PCBDDCBenignShellMat" 1222 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1223 { 1224 PC_IS *pcis = (PC_IS*)pc->data; 1225 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1226 PCBDDCBenignMatMult_ctx ctx; 1227 PetscErrorCode ierr; 1228 1229 PetscFunctionBegin; 1230 if (!restore) { 1231 Mat A_IB,A_BI; 1232 PetscScalar *work; 1233 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1234 1235 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1236 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1237 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1238 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1239 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1240 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1241 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1242 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1243 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1244 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1245 ctx->apply_left = PETSC_TRUE; 1246 ctx->apply_right = PETSC_FALSE; 1247 ctx->apply_p0 = PETSC_FALSE; 1248 ctx->benign_n = pcbddc->benign_n; 1249 if (reuse) { 1250 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1251 ctx->free = PETSC_FALSE; 1252 } else { /* TODO: could be optimized for successive solves */ 1253 ISLocalToGlobalMapping N_to_D; 1254 PetscInt i; 1255 1256 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1257 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1258 for (i=0;i<pcbddc->benign_n;i++) { 1259 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1260 } 1261 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1262 ctx->free = PETSC_TRUE; 1263 } 1264 ctx->A = pcis->A_IB; 1265 ctx->work = work; 1266 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1267 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1268 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1269 pcis->A_IB = A_IB; 1270 1271 /* A_BI as A_IB^T */ 1272 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1273 pcbddc->benign_original_mat = pcis->A_BI; 1274 pcis->A_BI = A_BI; 1275 } else { 1276 if (!pcbddc->benign_original_mat) { 1277 PetscFunctionReturn(0); 1278 } 1279 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1280 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1281 pcis->A_IB = ctx->A; 1282 ctx->A = NULL; 1283 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1284 pcis->A_BI = pcbddc->benign_original_mat; 1285 pcbddc->benign_original_mat = NULL; 1286 if (ctx->free) { 1287 PetscInt i; 1288 for (i=0;i<ctx->benign_n;i++) { 1289 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1290 } 1291 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1292 } 1293 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1294 ierr = PetscFree(ctx);CHKERRQ(ierr); 1295 } 1296 PetscFunctionReturn(0); 1297 } 1298 1299 /* used just in bddc debug mode */ 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "PCBDDCBenignProject" 1302 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1303 { 1304 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1305 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1306 Mat An; 1307 PetscErrorCode ierr; 1308 1309 PetscFunctionBegin; 1310 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1311 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1312 if (is1) { 1313 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1314 ierr = MatDestroy(&An);CHKERRQ(ierr); 1315 } else { 1316 *B = An; 1317 } 1318 PetscFunctionReturn(0); 1319 } 1320 1321 /* TODO: add reuse flag */ 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatSeqAIJCompress" 1324 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1325 { 1326 Mat Bt; 1327 PetscScalar *a,*bdata; 1328 const PetscInt *ii,*ij; 1329 PetscInt m,n,i,nnz,*bii,*bij; 1330 PetscBool flg_row; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1335 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1336 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1337 nnz = n; 1338 for (i=0;i<ii[n];i++) { 1339 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1340 } 1341 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1342 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1343 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1344 nnz = 0; 1345 bii[0] = 0; 1346 for (i=0;i<n;i++) { 1347 PetscInt j; 1348 for (j=ii[i];j<ii[i+1];j++) { 1349 PetscScalar entry = a[j]; 1350 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1351 bij[nnz] = ij[j]; 1352 bdata[nnz] = entry; 1353 nnz++; 1354 } 1355 } 1356 bii[i+1] = nnz; 1357 } 1358 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1359 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1360 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1361 { 1362 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1363 b->free_a = PETSC_TRUE; 1364 b->free_ij = PETSC_TRUE; 1365 } 1366 *B = Bt; 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatDetectDisconnectedComponents" 1372 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1373 { 1374 Mat B; 1375 IS is_dummy,*cc_n; 1376 ISLocalToGlobalMapping l2gmap_dummy; 1377 PCBDDCGraph graph; 1378 PetscInt i,n; 1379 PetscInt *xadj,*adjncy; 1380 PetscInt *xadj_filtered,*adjncy_filtered; 1381 PetscBool flg_row,isseqaij; 1382 PetscErrorCode ierr; 1383 1384 PetscFunctionBegin; 1385 if (!A->rmap->N || !A->cmap->N) { 1386 *ncc = 0; 1387 *cc = NULL; 1388 PetscFunctionReturn(0); 1389 } 1390 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1391 if (!isseqaij && filter) { 1392 PetscBool isseqdense; 1393 1394 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1395 if (!isseqdense) { 1396 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1397 } else { /* TODO: rectangular case and LDA */ 1398 PetscScalar *array; 1399 PetscReal chop=1.e-6; 1400 1401 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1402 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1403 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1404 for (i=0;i<n;i++) { 1405 PetscInt j; 1406 for (j=i+1;j<n;j++) { 1407 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1408 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1409 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1410 } 1411 } 1412 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1413 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1414 } 1415 } else { 1416 B = A; 1417 } 1418 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1419 1420 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1421 if (filter) { 1422 PetscScalar *data; 1423 PetscInt j,cum; 1424 1425 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1426 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1427 cum = 0; 1428 for (i=0;i<n;i++) { 1429 PetscInt t; 1430 1431 for (j=xadj[i];j<xadj[i+1];j++) { 1432 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1433 continue; 1434 } 1435 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1436 } 1437 t = xadj_filtered[i]; 1438 xadj_filtered[i] = cum; 1439 cum += t; 1440 } 1441 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1442 } else { 1443 xadj_filtered = NULL; 1444 adjncy_filtered = NULL; 1445 } 1446 1447 /* compute local connected components using PCBDDCGraph */ 1448 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1449 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1450 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1451 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1452 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1453 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1454 if (xadj_filtered) { 1455 graph->xadj = xadj_filtered; 1456 graph->adjncy = adjncy_filtered; 1457 } else { 1458 graph->xadj = xadj; 1459 graph->adjncy = adjncy; 1460 } 1461 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1462 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1463 /* partial clean up */ 1464 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1465 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1466 if (A != B) { 1467 ierr = MatDestroy(&B);CHKERRQ(ierr); 1468 } 1469 1470 /* get back data */ 1471 if (ncc) *ncc = graph->ncc; 1472 if (cc) { 1473 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1474 for (i=0;i<graph->ncc;i++) { 1475 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1476 } 1477 *cc = cc_n; 1478 } 1479 /* clean up graph */ 1480 graph->xadj = 0; 1481 graph->adjncy = 0; 1482 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "PCBDDCBenignCheck" 1488 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1489 { 1490 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1491 PC_IS* pcis = (PC_IS*)(pc->data); 1492 IS dirIS = NULL; 1493 PetscInt i; 1494 PetscErrorCode ierr; 1495 1496 PetscFunctionBegin; 1497 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1498 if (zerodiag) { 1499 Mat A; 1500 Vec vec3_N; 1501 PetscScalar *vals; 1502 const PetscInt *idxs; 1503 PetscInt nz,*count; 1504 1505 /* p0 */ 1506 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1507 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1508 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1509 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1510 for (i=0;i<nz;i++) vals[i] = 1.; 1511 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1512 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1513 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1514 /* v_I */ 1515 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1516 for (i=0;i<nz;i++) vals[i] = 0.; 1517 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1518 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1519 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1520 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1521 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1522 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1523 if (dirIS) { 1524 PetscInt n; 1525 1526 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1527 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1528 for (i=0;i<n;i++) vals[i] = 0.; 1529 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1530 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1531 } 1532 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1533 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1534 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1535 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1536 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1537 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1538 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1539 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1540 ierr = PetscFree(vals);CHKERRQ(ierr); 1541 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1542 1543 /* there should not be any pressure dofs lying on the interface */ 1544 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1545 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1546 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1547 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1548 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1549 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1550 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1551 ierr = PetscFree(count);CHKERRQ(ierr); 1552 } 1553 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1554 1555 /* check PCBDDCBenignGetOrSetP0 */ 1556 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1557 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1558 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1559 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1560 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1561 for (i=0;i<pcbddc->benign_n;i++) { 1562 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1563 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1564 } 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1570 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1571 { 1572 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1573 IS pressures,zerodiag,*zerodiag_subs; 1574 PetscInt nz,n; 1575 PetscInt *interior_dofs,n_interior_dofs; 1576 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1577 PetscErrorCode ierr; 1578 1579 PetscFunctionBegin; 1580 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1581 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1582 for (n=0;n<pcbddc->benign_n;n++) { 1583 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1584 } 1585 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1586 pcbddc->benign_n = 0; 1587 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1588 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1589 Checks if all the pressure dofs in each subdomain have a zero diagonal 1590 If not, a change of basis on pressures is not needed 1591 since the local Schur complements are already SPD 1592 */ 1593 has_null_pressures = PETSC_TRUE; 1594 have_null = PETSC_TRUE; 1595 if (pcbddc->n_ISForDofsLocal) { 1596 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1597 1598 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1599 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1600 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1601 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1602 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1603 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1604 if (!sorted) { 1605 ierr = ISSort(pressures);CHKERRQ(ierr); 1606 } 1607 } else { 1608 pressures = NULL; 1609 } 1610 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1611 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1612 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1613 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1614 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1615 if (!sorted) { 1616 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1617 } 1618 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1619 if (!nz) { 1620 if (n) have_null = PETSC_FALSE; 1621 has_null_pressures = PETSC_FALSE; 1622 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1623 } 1624 recompute_zerodiag = PETSC_FALSE; 1625 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1626 zerodiag_subs = NULL; 1627 pcbddc->benign_n = 0; 1628 n_interior_dofs = 0; 1629 interior_dofs = NULL; 1630 if (pcbddc->current_level) { /* need to compute interior nodes */ 1631 PetscInt n,i,j; 1632 PetscInt n_neigh,*neigh,*n_shared,**shared; 1633 PetscInt *iwork; 1634 1635 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1636 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1637 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1638 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1639 for (i=1;i<n_neigh;i++) 1640 for (j=0;j<n_shared[i];j++) 1641 iwork[shared[i][j]] += 1; 1642 for (i=0;i<n;i++) 1643 if (!iwork[i]) 1644 interior_dofs[n_interior_dofs++] = i; 1645 ierr = PetscFree(iwork);CHKERRQ(ierr); 1646 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1647 } 1648 if (has_null_pressures) { 1649 IS *subs; 1650 PetscInt nsubs,i,j,nl; 1651 const PetscInt *idxs; 1652 PetscScalar *array; 1653 Vec *work; 1654 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1655 1656 subs = pcbddc->local_subs; 1657 nsubs = pcbddc->n_local_subs; 1658 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1659 if (pcbddc->current_level) { 1660 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1661 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1662 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1663 /* work[0] = 1_p */ 1664 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1665 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1666 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1667 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1668 /* work[0] = 1_v */ 1669 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1670 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1671 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1672 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1673 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1674 } 1675 if (nsubs > 1) { 1676 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1677 for (i=0;i<nsubs;i++) { 1678 ISLocalToGlobalMapping l2g; 1679 IS t_zerodiag_subs; 1680 PetscInt nl; 1681 1682 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1683 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1684 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1685 if (nl) { 1686 PetscBool valid = PETSC_TRUE; 1687 1688 if (pcbddc->current_level) { 1689 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1690 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1691 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1692 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1693 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1694 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1695 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 1696 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 1697 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1698 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 1699 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1700 for (j=0;j<n_interior_dofs;j++) { 1701 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1702 valid = PETSC_FALSE; 1703 break; 1704 } 1705 } 1706 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1707 } 1708 if (valid && pcbddc->NeumannBoundariesLocal) { 1709 IS t_bc; 1710 PetscInt nzb; 1711 1712 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 1713 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 1714 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 1715 if (nzb) valid = PETSC_FALSE; 1716 } 1717 if (valid && pressures) { 1718 IS t_pressure_subs; 1719 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 1720 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 1721 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 1722 } 1723 if (valid) { 1724 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 1725 pcbddc->benign_n++; 1726 } else { 1727 recompute_zerodiag = PETSC_TRUE; 1728 } 1729 } 1730 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 1731 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 1732 } 1733 } else { /* there's just one subdomain (or zero if they have not been detected */ 1734 PetscBool valid = PETSC_TRUE; 1735 1736 if (pcbddc->NeumannBoundariesLocal) { 1737 PetscInt nzb; 1738 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 1739 if (nzb) valid = PETSC_FALSE; 1740 } 1741 if (valid && pressures) { 1742 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 1743 } 1744 if (valid && pcbddc->current_level) { 1745 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 1746 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 1747 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1748 for (j=0;j<n_interior_dofs;j++) { 1749 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1750 valid = PETSC_FALSE; 1751 break; 1752 } 1753 } 1754 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1755 } 1756 if (valid) { 1757 pcbddc->benign_n = 1; 1758 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 1759 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 1760 zerodiag_subs[0] = zerodiag; 1761 } 1762 } 1763 if (pcbddc->current_level) { 1764 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 1765 } 1766 } 1767 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 1768 1769 if (!pcbddc->benign_n) { 1770 PetscInt n; 1771 1772 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1773 recompute_zerodiag = PETSC_FALSE; 1774 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1775 if (n) { 1776 has_null_pressures = PETSC_FALSE; 1777 have_null = PETSC_FALSE; 1778 } 1779 } 1780 1781 /* final check for null pressures */ 1782 if (zerodiag && pressures) { 1783 PetscInt nz,np; 1784 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1785 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 1786 if (nz != np) have_null = PETSC_FALSE; 1787 } 1788 1789 if (recompute_zerodiag) { 1790 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1791 if (pcbddc->benign_n == 1) { 1792 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 1793 zerodiag = zerodiag_subs[0]; 1794 } else { 1795 PetscInt i,nzn,*new_idxs; 1796 1797 nzn = 0; 1798 for (i=0;i<pcbddc->benign_n;i++) { 1799 PetscInt ns; 1800 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 1801 nzn += ns; 1802 } 1803 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 1804 nzn = 0; 1805 for (i=0;i<pcbddc->benign_n;i++) { 1806 PetscInt ns,*idxs; 1807 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 1808 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1809 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 1810 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1811 nzn += ns; 1812 } 1813 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 1814 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 1815 } 1816 have_null = PETSC_FALSE; 1817 } 1818 1819 /* Prepare matrix to compute no-net-flux */ 1820 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 1821 Mat A,loc_divudotp; 1822 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 1823 IS row,col,isused = NULL; 1824 PetscInt M,N,n,st,n_isused; 1825 1826 if (pressures) { 1827 isused = pressures; 1828 } else { 1829 isused = zerodiag; 1830 } 1831 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 1832 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1833 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 1834 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 1835 n_isused = 0; 1836 if (isused) { 1837 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 1838 } 1839 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1840 st = st-n_isused; 1841 if (n) { 1842 const PetscInt *gidxs; 1843 1844 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 1845 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1846 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 1847 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1848 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1849 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1850 } else { 1851 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 1852 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1853 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1854 } 1855 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 1856 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 1857 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 1858 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 1859 ierr = ISDestroy(&row);CHKERRQ(ierr); 1860 ierr = ISDestroy(&col);CHKERRQ(ierr); 1861 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 1862 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 1863 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1864 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1865 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1866 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1867 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1868 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1869 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1870 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1871 } 1872 1873 /* change of basis and p0 dofs */ 1874 if (has_null_pressures) { 1875 IS zerodiagc; 1876 const PetscInt *idxs,*idxsc; 1877 PetscInt i,s,*nnz; 1878 1879 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1880 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1881 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1882 /* local change of basis for pressures */ 1883 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1884 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1885 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1886 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1887 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1888 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1889 for (i=0;i<pcbddc->benign_n;i++) { 1890 PetscInt nzs,j; 1891 1892 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1893 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1894 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1895 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1896 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1897 } 1898 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1899 ierr = PetscFree(nnz);CHKERRQ(ierr); 1900 /* set identity on velocities */ 1901 for (i=0;i<n-nz;i++) { 1902 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1903 } 1904 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1905 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1906 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1907 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1908 /* set change on pressures */ 1909 for (s=0;s<pcbddc->benign_n;s++) { 1910 PetscScalar *array; 1911 PetscInt nzs; 1912 1913 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1914 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1915 for (i=0;i<nzs-1;i++) { 1916 PetscScalar vals[2]; 1917 PetscInt cols[2]; 1918 1919 cols[0] = idxs[i]; 1920 cols[1] = idxs[nzs-1]; 1921 vals[0] = 1.; 1922 vals[1] = 1.; 1923 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1924 } 1925 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1926 for (i=0;i<nzs-1;i++) array[i] = -1.; 1927 array[nzs-1] = 1.; 1928 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1929 /* store local idxs for p0 */ 1930 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1931 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1932 ierr = PetscFree(array);CHKERRQ(ierr); 1933 } 1934 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1935 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1936 /* project if needed */ 1937 if (pcbddc->benign_change_explicit) { 1938 Mat M; 1939 1940 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1941 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1942 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1943 ierr = MatDestroy(&M);CHKERRQ(ierr); 1944 } 1945 /* store global idxs for p0 */ 1946 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1947 } 1948 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1949 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1950 1951 /* determines if the coarse solver will be singular or not */ 1952 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1953 /* determines if the problem has subdomains with 0 pressure block */ 1954 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1955 *zerodiaglocal = zerodiag; 1956 PetscFunctionReturn(0); 1957 } 1958 1959 #undef __FUNCT__ 1960 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1961 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1962 { 1963 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1964 PetscScalar *array; 1965 PetscErrorCode ierr; 1966 1967 PetscFunctionBegin; 1968 if (!pcbddc->benign_sf) { 1969 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1970 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1971 } 1972 if (get) { 1973 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1974 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1975 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1976 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1977 } else { 1978 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1979 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1980 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1981 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1982 } 1983 PetscFunctionReturn(0); 1984 } 1985 1986 #undef __FUNCT__ 1987 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1988 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1989 { 1990 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1991 PetscErrorCode ierr; 1992 1993 PetscFunctionBegin; 1994 /* TODO: add error checking 1995 - avoid nested pop (or push) calls. 1996 - cannot push before pop. 1997 - cannot call this if pcbddc->local_mat is NULL 1998 */ 1999 if (!pcbddc->benign_n) { 2000 PetscFunctionReturn(0); 2001 } 2002 if (pop) { 2003 if (pcbddc->benign_change_explicit) { 2004 IS is_p0; 2005 MatReuse reuse; 2006 2007 /* extract B_0 */ 2008 reuse = MAT_INITIAL_MATRIX; 2009 if (pcbddc->benign_B0) { 2010 reuse = MAT_REUSE_MATRIX; 2011 } 2012 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2013 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2014 /* remove rows and cols from local problem */ 2015 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2016 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2017 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2018 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2019 } else { 2020 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2021 PetscScalar *vals; 2022 PetscInt i,n,*idxs_ins; 2023 2024 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2025 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2026 if (!pcbddc->benign_B0) { 2027 PetscInt *nnz; 2028 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2029 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2030 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2031 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2032 for (i=0;i<pcbddc->benign_n;i++) { 2033 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2034 nnz[i] = n - nnz[i]; 2035 } 2036 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2037 ierr = PetscFree(nnz);CHKERRQ(ierr); 2038 } 2039 2040 for (i=0;i<pcbddc->benign_n;i++) { 2041 PetscScalar *array; 2042 PetscInt *idxs,j,nz,cum; 2043 2044 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2045 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2046 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2047 for (j=0;j<nz;j++) vals[j] = 1.; 2048 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2049 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2050 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2051 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2052 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2053 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2054 cum = 0; 2055 for (j=0;j<n;j++) { 2056 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2057 vals[cum] = array[j]; 2058 idxs_ins[cum] = j; 2059 cum++; 2060 } 2061 } 2062 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2063 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2064 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2065 } 2066 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2069 } 2070 } else { /* push */ 2071 if (pcbddc->benign_change_explicit) { 2072 PetscInt i; 2073 2074 for (i=0;i<pcbddc->benign_n;i++) { 2075 PetscScalar *B0_vals; 2076 PetscInt *B0_cols,B0_ncol; 2077 2078 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2079 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2080 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2081 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2082 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2083 } 2084 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2085 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2086 } else { 2087 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2088 } 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 #undef __FUNCT__ 2094 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2095 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2096 { 2097 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2098 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2099 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2100 PetscBLASInt *B_iwork,*B_ifail; 2101 PetscScalar *work,lwork; 2102 PetscScalar *St,*S,*eigv; 2103 PetscScalar *Sarray,*Starray; 2104 PetscReal *eigs,thresh; 2105 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2106 PetscBool allocated_S_St; 2107 #if defined(PETSC_USE_COMPLEX) 2108 PetscReal *rwork; 2109 #endif 2110 PetscErrorCode ierr; 2111 2112 PetscFunctionBegin; 2113 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2114 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2115 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2116 2117 if (pcbddc->dbg_flag) { 2118 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2120 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2121 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2122 } 2123 2124 if (pcbddc->dbg_flag) { 2125 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2126 } 2127 2128 /* max size of subsets */ 2129 mss = 0; 2130 for (i=0;i<sub_schurs->n_subs;i++) { 2131 PetscInt subset_size; 2132 2133 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2134 mss = PetscMax(mss,subset_size); 2135 } 2136 2137 /* min/max and threshold */ 2138 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2139 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2140 nmax = PetscMax(nmin,nmax); 2141 allocated_S_St = PETSC_FALSE; 2142 if (nmin) { 2143 allocated_S_St = PETSC_TRUE; 2144 } 2145 2146 /* allocate lapack workspace */ 2147 cum = cum2 = 0; 2148 maxneigs = 0; 2149 for (i=0;i<sub_schurs->n_subs;i++) { 2150 PetscInt n,subset_size; 2151 2152 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2153 n = PetscMin(subset_size,nmax); 2154 cum += subset_size; 2155 cum2 += subset_size*n; 2156 maxneigs = PetscMax(maxneigs,n); 2157 } 2158 if (mss) { 2159 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2160 PetscBLASInt B_itype = 1; 2161 PetscBLASInt B_N = mss; 2162 PetscReal zero = 0.0; 2163 PetscReal eps = 0.0; /* dlamch? */ 2164 2165 B_lwork = -1; 2166 S = NULL; 2167 St = NULL; 2168 eigs = NULL; 2169 eigv = NULL; 2170 B_iwork = NULL; 2171 B_ifail = NULL; 2172 #if defined(PETSC_USE_COMPLEX) 2173 rwork = NULL; 2174 #endif 2175 thresh = 1.0; 2176 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2177 #if defined(PETSC_USE_COMPLEX) 2178 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2179 #else 2180 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2181 #endif 2182 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2183 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2184 } else { 2185 /* TODO */ 2186 } 2187 } else { 2188 lwork = 0; 2189 } 2190 2191 nv = 0; 2192 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2193 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2194 } 2195 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2196 if (allocated_S_St) { 2197 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2198 } 2199 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2200 #if defined(PETSC_USE_COMPLEX) 2201 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2202 #endif 2203 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2204 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2205 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2206 nv+cum,&pcbddc->adaptive_constraints_idxs, 2207 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2208 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2209 2210 maxneigs = 0; 2211 cum = cumarray = 0; 2212 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2213 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2214 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2215 const PetscInt *idxs; 2216 2217 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2218 for (cum=0;cum<nv;cum++) { 2219 pcbddc->adaptive_constraints_n[cum] = 1; 2220 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2221 pcbddc->adaptive_constraints_data[cum] = 1.0; 2222 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2223 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2224 } 2225 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2226 } 2227 2228 if (mss) { /* multilevel */ 2229 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2230 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2231 } 2232 2233 thresh = pcbddc->adaptive_threshold; 2234 for (i=0;i<sub_schurs->n_subs;i++) { 2235 const PetscInt *idxs; 2236 PetscReal upper,lower; 2237 PetscInt j,subset_size,eigs_start = 0; 2238 PetscBLASInt B_N; 2239 PetscBool same_data = PETSC_FALSE; 2240 2241 if (pcbddc->use_deluxe_scaling) { 2242 upper = PETSC_MAX_REAL; 2243 lower = thresh; 2244 } else { 2245 upper = 1./thresh; 2246 lower = 0.; 2247 } 2248 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2249 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2250 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2251 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2252 if (sub_schurs->is_hermitian) { 2253 PetscInt j,k; 2254 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2255 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2256 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2257 } 2258 for (j=0;j<subset_size;j++) { 2259 for (k=j;k<subset_size;k++) { 2260 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2261 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2262 } 2263 } 2264 } else { 2265 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2266 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2267 } 2268 } else { 2269 S = Sarray + cumarray; 2270 St = Starray + cumarray; 2271 } 2272 /* see if we can save some work */ 2273 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2274 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2275 } 2276 2277 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2278 B_neigs = 0; 2279 } else { 2280 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2281 PetscBLASInt B_itype = 1; 2282 PetscBLASInt B_IL, B_IU; 2283 PetscReal eps = -1.0; /* dlamch? */ 2284 PetscInt nmin_s; 2285 PetscBool compute_range = PETSC_FALSE; 2286 2287 if (pcbddc->dbg_flag) { 2288 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2289 } 2290 2291 compute_range = PETSC_FALSE; 2292 if (thresh > 1.+PETSC_SMALL && !same_data) { 2293 compute_range = PETSC_TRUE; 2294 } 2295 2296 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2297 if (compute_range) { 2298 2299 /* ask for eigenvalues larger than thresh */ 2300 #if defined(PETSC_USE_COMPLEX) 2301 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2302 #else 2303 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2304 #endif 2305 } else if (!same_data) { 2306 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2307 B_IL = 1; 2308 #if defined(PETSC_USE_COMPLEX) 2309 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2310 #else 2311 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2312 #endif 2313 } else { /* same_data is true, so get the adaptive function requested by the user */ 2314 PetscInt k; 2315 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2316 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2317 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2318 nmin = nmax; 2319 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2320 for (k=0;k<nmax;k++) { 2321 eigs[k] = 1./PETSC_SMALL; 2322 eigv[k*(subset_size+1)] = 1.0; 2323 } 2324 } 2325 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2326 if (B_ierr) { 2327 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2328 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2329 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2330 } 2331 2332 if (B_neigs > nmax) { 2333 if (pcbddc->dbg_flag) { 2334 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2335 } 2336 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2337 B_neigs = nmax; 2338 } 2339 2340 nmin_s = PetscMin(nmin,B_N); 2341 if (B_neigs < nmin_s) { 2342 PetscBLASInt B_neigs2; 2343 2344 if (pcbddc->use_deluxe_scaling) { 2345 B_IL = B_N - nmin_s + 1; 2346 B_IU = B_N - B_neigs; 2347 } else { 2348 B_IL = B_neigs + 1; 2349 B_IU = nmin_s; 2350 } 2351 if (pcbddc->dbg_flag) { 2352 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2353 } 2354 if (sub_schurs->is_hermitian) { 2355 PetscInt j,k; 2356 for (j=0;j<subset_size;j++) { 2357 for (k=j;k<subset_size;k++) { 2358 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2359 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2360 } 2361 } 2362 } else { 2363 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2364 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2365 } 2366 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2367 #if defined(PETSC_USE_COMPLEX) 2368 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2369 #else 2370 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2371 #endif 2372 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2373 B_neigs += B_neigs2; 2374 } 2375 if (B_ierr) { 2376 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2377 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2378 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2379 } 2380 if (pcbddc->dbg_flag) { 2381 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2382 for (j=0;j<B_neigs;j++) { 2383 if (eigs[j] == 0.0) { 2384 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2385 } else { 2386 if (pcbddc->use_deluxe_scaling) { 2387 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2388 } else { 2389 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2390 } 2391 } 2392 } 2393 } 2394 } else { 2395 /* TODO */ 2396 } 2397 } 2398 /* change the basis back to the original one */ 2399 if (sub_schurs->change) { 2400 Mat change,phi,phit; 2401 2402 if (pcbddc->dbg_flag > 1) { 2403 PetscInt ii; 2404 for (ii=0;ii<B_neigs;ii++) { 2405 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2406 for (j=0;j<B_N;j++) { 2407 #if defined(PETSC_USE_COMPLEX) 2408 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2409 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2411 #else 2412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2413 #endif 2414 } 2415 } 2416 } 2417 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2418 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2419 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2420 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2421 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2422 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2423 } 2424 maxneigs = PetscMax(B_neigs,maxneigs); 2425 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2426 if (B_neigs) { 2427 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2428 2429 if (pcbddc->dbg_flag > 1) { 2430 PetscInt ii; 2431 for (ii=0;ii<B_neigs;ii++) { 2432 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2433 for (j=0;j<B_N;j++) { 2434 #if defined(PETSC_USE_COMPLEX) 2435 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2436 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2437 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2438 #else 2439 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2440 #endif 2441 } 2442 } 2443 } 2444 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2445 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2446 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2447 cum++; 2448 } 2449 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2450 /* shift for next computation */ 2451 cumarray += subset_size*subset_size; 2452 } 2453 if (pcbddc->dbg_flag) { 2454 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2455 } 2456 2457 if (mss) { 2458 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2459 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2460 /* destroy matrices (junk) */ 2461 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2462 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2463 } 2464 if (allocated_S_St) { 2465 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2466 } 2467 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2468 #if defined(PETSC_USE_COMPLEX) 2469 ierr = PetscFree(rwork);CHKERRQ(ierr); 2470 #endif 2471 if (pcbddc->dbg_flag) { 2472 PetscInt maxneigs_r; 2473 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2474 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2475 } 2476 PetscFunctionReturn(0); 2477 } 2478 2479 #undef __FUNCT__ 2480 #define __FUNCT__ "PCBDDCSetUpSolvers" 2481 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2482 { 2483 PetscScalar *coarse_submat_vals; 2484 PetscErrorCode ierr; 2485 2486 PetscFunctionBegin; 2487 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2488 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2489 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2490 2491 /* Setup local neumann solver ksp_R */ 2492 /* PCBDDCSetUpLocalScatters should be called first! */ 2493 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2494 2495 /* 2496 Setup local correction and local part of coarse basis. 2497 Gives back the dense local part of the coarse matrix in column major ordering 2498 */ 2499 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2500 2501 /* Compute total number of coarse nodes and setup coarse solver */ 2502 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2503 2504 /* free */ 2505 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "PCBDDCResetCustomization" 2511 PetscErrorCode PCBDDCResetCustomization(PC pc) 2512 { 2513 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2514 PetscErrorCode ierr; 2515 2516 PetscFunctionBegin; 2517 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2518 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2519 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2520 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2521 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2522 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2523 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2524 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2525 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2526 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 #undef __FUNCT__ 2531 #define __FUNCT__ "PCBDDCResetTopography" 2532 PetscErrorCode PCBDDCResetTopography(PC pc) 2533 { 2534 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2535 PetscInt i; 2536 PetscErrorCode ierr; 2537 2538 PetscFunctionBegin; 2539 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2540 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2541 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2542 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2543 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2544 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2545 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2546 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2547 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2548 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2549 pcbddc->graphanalyzed = PETSC_FALSE; 2550 for (i=0;i<pcbddc->n_local_subs;i++) { 2551 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2552 } 2553 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2554 if (pcbddc->sub_schurs) { 2555 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2556 } 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "PCBDDCResetSolvers" 2562 PetscErrorCode PCBDDCResetSolvers(PC pc) 2563 { 2564 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2565 PetscErrorCode ierr; 2566 2567 PetscFunctionBegin; 2568 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2569 if (pcbddc->coarse_phi_B) { 2570 PetscScalar *array; 2571 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2572 ierr = PetscFree(array);CHKERRQ(ierr); 2573 } 2574 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2575 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2576 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2577 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2578 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2579 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2580 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2581 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2582 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2583 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2584 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2585 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2586 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2587 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2588 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2589 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2590 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2591 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2592 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2593 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2594 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2595 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2596 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2597 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2598 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2599 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2600 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2601 if (pcbddc->benign_zerodiag_subs) { 2602 PetscInt i; 2603 for (i=0;i<pcbddc->benign_n;i++) { 2604 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2605 } 2606 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2607 } 2608 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2609 PetscFunctionReturn(0); 2610 } 2611 2612 #undef __FUNCT__ 2613 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2614 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2615 { 2616 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2617 PC_IS *pcis = (PC_IS*)pc->data; 2618 VecType impVecType; 2619 PetscInt n_constraints,n_R,old_size; 2620 PetscErrorCode ierr; 2621 2622 PetscFunctionBegin; 2623 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2624 /* get sizes */ 2625 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2626 n_R = pcis->n - pcbddc->n_vertices; 2627 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2628 /* local work vectors (try to avoid unneeded work)*/ 2629 /* R nodes */ 2630 old_size = -1; 2631 if (pcbddc->vec1_R) { 2632 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2633 } 2634 if (n_R != old_size) { 2635 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2636 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2637 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2638 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2639 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2640 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2641 } 2642 /* local primal dofs */ 2643 old_size = -1; 2644 if (pcbddc->vec1_P) { 2645 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2646 } 2647 if (pcbddc->local_primal_size != old_size) { 2648 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2649 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2650 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2651 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2652 } 2653 /* local explicit constraints */ 2654 old_size = -1; 2655 if (pcbddc->vec1_C) { 2656 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2657 } 2658 if (n_constraints && n_constraints != old_size) { 2659 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2660 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2661 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2662 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2663 } 2664 PetscFunctionReturn(0); 2665 } 2666 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "PCBDDCSetUpCorrection" 2669 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2670 { 2671 PetscErrorCode ierr; 2672 /* pointers to pcis and pcbddc */ 2673 PC_IS* pcis = (PC_IS*)pc->data; 2674 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2675 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2676 /* submatrices of local problem */ 2677 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2678 /* submatrices of local coarse problem */ 2679 Mat S_VV,S_CV,S_VC,S_CC; 2680 /* working matrices */ 2681 Mat C_CR; 2682 /* additional working stuff */ 2683 PC pc_R; 2684 Mat F; 2685 Vec dummy_vec; 2686 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2687 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2688 PetscScalar *work; 2689 PetscInt *idx_V_B; 2690 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2691 PetscInt i,n_R,n_D,n_B; 2692 2693 /* some shortcuts to scalars */ 2694 PetscScalar one=1.0,m_one=-1.0; 2695 2696 PetscFunctionBegin; 2697 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 2698 2699 /* Set Non-overlapping dimensions */ 2700 n_vertices = pcbddc->n_vertices; 2701 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 2702 n_B = pcis->n_B; 2703 n_D = pcis->n - n_B; 2704 n_R = pcis->n - n_vertices; 2705 2706 /* vertices in boundary numbering */ 2707 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 2708 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 2709 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 2710 2711 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2712 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 2713 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 2714 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 2715 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 2716 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 2717 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 2718 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 2719 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 2720 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 2721 2722 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 2723 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 2724 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 2725 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 2726 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 2727 lda_rhs = n_R; 2728 need_benign_correction = PETSC_FALSE; 2729 if (isLU || isILU || isCHOL) { 2730 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 2731 } else if (sub_schurs && sub_schurs->reuse_solver) { 2732 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2733 MatFactorType type; 2734 2735 F = reuse_solver->F; 2736 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 2737 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 2738 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 2739 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 2740 } else { 2741 F = NULL; 2742 } 2743 2744 /* allocate workspace */ 2745 n = 0; 2746 if (n_constraints) { 2747 n += lda_rhs*n_constraints; 2748 } 2749 if (n_vertices) { 2750 n = PetscMax(2*lda_rhs*n_vertices,n); 2751 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 2752 } 2753 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 2754 2755 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 2756 dummy_vec = NULL; 2757 if (need_benign_correction && lda_rhs != n_R && F) { 2758 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 2759 } 2760 2761 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2762 if (n_constraints) { 2763 Mat M1,M2,M3,C_B; 2764 IS is_aux; 2765 PetscScalar *array,*array2; 2766 2767 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2768 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2769 2770 /* Extract constraints on R nodes: C_{CR} */ 2771 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 2772 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2773 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2774 2775 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 2776 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 2777 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2778 for (i=0;i<n_constraints;i++) { 2779 const PetscScalar *row_cmat_values; 2780 const PetscInt *row_cmat_indices; 2781 PetscInt size_of_constraint,j; 2782 2783 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 2784 for (j=0;j<size_of_constraint;j++) { 2785 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 2786 } 2787 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 2788 } 2789 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 2790 if (F) { 2791 Mat B; 2792 2793 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2794 if (need_benign_correction) { 2795 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2796 2797 /* rhs is already zero on interior dofs, no need to change the rhs */ 2798 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 2799 } 2800 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 2801 if (need_benign_correction) { 2802 PetscScalar *marr; 2803 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2804 2805 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2806 if (lda_rhs != n_R) { 2807 for (i=0;i<n_constraints;i++) { 2808 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2809 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2810 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2811 } 2812 } else { 2813 for (i=0;i<n_constraints;i++) { 2814 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2815 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2816 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2817 } 2818 } 2819 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2820 } 2821 ierr = MatDestroy(&B);CHKERRQ(ierr); 2822 } else { 2823 PetscScalar *marr; 2824 2825 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2826 for (i=0;i<n_constraints;i++) { 2827 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2828 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 2829 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2830 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2831 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2832 } 2833 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2834 } 2835 if (!pcbddc->switch_static) { 2836 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2837 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 2838 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 2839 for (i=0;i<n_constraints;i++) { 2840 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 2841 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 2842 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2843 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2844 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2845 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2846 } 2847 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 2848 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 2849 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2850 } else { 2851 if (lda_rhs != n_R) { 2852 IS dummy; 2853 2854 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 2855 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2856 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 2857 } else { 2858 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 2859 pcbddc->local_auxmat2 = local_auxmat2_R; 2860 } 2861 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2862 } 2863 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2864 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2865 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 2866 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 2867 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 2868 if (isCHOL) { 2869 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 2870 } else { 2871 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 2872 } 2873 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2874 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2875 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2876 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2877 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2878 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2879 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2880 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2881 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2882 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2883 } 2884 2885 /* Get submatrices from subdomain matrix */ 2886 if (n_vertices) { 2887 IS is_aux; 2888 2889 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2890 IS tis; 2891 2892 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2893 ierr = ISSort(tis);CHKERRQ(ierr); 2894 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2895 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2896 } else { 2897 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2898 } 2899 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2900 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2901 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2902 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2903 } 2904 2905 /* Matrix of coarse basis functions (local) */ 2906 if (pcbddc->coarse_phi_B) { 2907 PetscInt on_B,on_primal,on_D=n_D; 2908 if (pcbddc->coarse_phi_D) { 2909 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2910 } 2911 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2912 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2913 PetscScalar *marray; 2914 2915 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2916 ierr = PetscFree(marray);CHKERRQ(ierr); 2917 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2918 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2919 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2920 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2921 } 2922 } 2923 2924 if (!pcbddc->coarse_phi_B) { 2925 PetscScalar *marray; 2926 2927 n = n_B*pcbddc->local_primal_size; 2928 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2929 n += n_D*pcbddc->local_primal_size; 2930 } 2931 if (!pcbddc->symmetric_primal) { 2932 n *= 2; 2933 } 2934 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2935 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2936 n = n_B*pcbddc->local_primal_size; 2937 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2938 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2939 n += n_D*pcbddc->local_primal_size; 2940 } 2941 if (!pcbddc->symmetric_primal) { 2942 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2943 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2944 n = n_B*pcbddc->local_primal_size; 2945 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2946 } 2947 } else { 2948 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2949 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2950 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2951 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2952 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2953 } 2954 } 2955 } 2956 2957 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2958 p0_lidx_I = NULL; 2959 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2960 const PetscInt *idxs; 2961 2962 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2963 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2964 for (i=0;i<pcbddc->benign_n;i++) { 2965 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2966 } 2967 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2968 } 2969 2970 /* vertices */ 2971 if (n_vertices) { 2972 2973 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2974 2975 if (n_R) { 2976 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2977 PetscBLASInt B_N,B_one = 1; 2978 PetscScalar *x,*y; 2979 PetscBool isseqaij; 2980 2981 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2982 if (need_benign_correction) { 2983 ISLocalToGlobalMapping RtoN; 2984 IS is_p0; 2985 PetscInt *idxs_p0,n; 2986 2987 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2988 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2989 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2990 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2991 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2992 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2993 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2994 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2995 } 2996 2997 if (lda_rhs == n_R) { 2998 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2999 } else { 3000 PetscScalar *av,*array; 3001 const PetscInt *xadj,*adjncy; 3002 PetscInt n; 3003 PetscBool flg_row; 3004 3005 array = work+lda_rhs*n_vertices; 3006 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3007 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3008 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3009 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3010 for (i=0;i<n;i++) { 3011 PetscInt j; 3012 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3013 } 3014 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3015 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3016 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3017 } 3018 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3019 if (need_benign_correction) { 3020 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3021 PetscScalar *marr; 3022 3023 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3024 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3025 3026 | 0 0 0 | (V) 3027 L = | 0 0 -1 | (P-p0) 3028 | 0 0 -1 | (p0) 3029 3030 */ 3031 for (i=0;i<reuse_solver->benign_n;i++) { 3032 const PetscScalar *vals; 3033 const PetscInt *idxs,*idxs_zero; 3034 PetscInt n,j,nz; 3035 3036 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3037 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3038 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3039 for (j=0;j<n;j++) { 3040 PetscScalar val = vals[j]; 3041 PetscInt k,col = idxs[j]; 3042 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3043 } 3044 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3045 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3046 } 3047 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3048 } 3049 if (F) { 3050 /* need to correct the rhs */ 3051 if (need_benign_correction) { 3052 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3053 PetscScalar *marr; 3054 3055 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3056 if (lda_rhs != n_R) { 3057 for (i=0;i<n_vertices;i++) { 3058 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3059 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3060 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3061 } 3062 } else { 3063 for (i=0;i<n_vertices;i++) { 3064 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3065 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3066 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3067 } 3068 } 3069 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3070 } 3071 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3072 /* need to correct the solution */ 3073 if (need_benign_correction) { 3074 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3075 PetscScalar *marr; 3076 3077 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3078 if (lda_rhs != n_R) { 3079 for (i=0;i<n_vertices;i++) { 3080 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3081 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3082 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3083 } 3084 } else { 3085 for (i=0;i<n_vertices;i++) { 3086 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3087 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3088 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3089 } 3090 } 3091 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3092 } 3093 } else { 3094 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3095 for (i=0;i<n_vertices;i++) { 3096 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3097 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3098 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3099 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3100 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3101 } 3102 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3103 } 3104 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3105 /* S_VV and S_CV */ 3106 if (n_constraints) { 3107 Mat B; 3108 3109 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3110 for (i=0;i<n_vertices;i++) { 3111 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3112 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3113 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3114 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3115 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3116 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3117 } 3118 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3119 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3120 ierr = MatDestroy(&B);CHKERRQ(ierr); 3121 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3122 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3123 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3124 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3125 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3126 ierr = MatDestroy(&B);CHKERRQ(ierr); 3127 } 3128 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3129 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3130 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3131 } 3132 if (lda_rhs != n_R) { 3133 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3134 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3135 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3136 } 3137 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3138 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3139 if (need_benign_correction) { 3140 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3141 PetscScalar *marr,*sums; 3142 3143 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3144 ierr = MatDenseGetArray(S_VVt,&marr); 3145 for (i=0;i<reuse_solver->benign_n;i++) { 3146 const PetscScalar *vals; 3147 const PetscInt *idxs,*idxs_zero; 3148 PetscInt n,j,nz; 3149 3150 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3151 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3152 for (j=0;j<n_vertices;j++) { 3153 PetscInt k; 3154 sums[j] = 0.; 3155 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3156 } 3157 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3158 for (j=0;j<n;j++) { 3159 PetscScalar val = vals[j]; 3160 PetscInt k; 3161 for (k=0;k<n_vertices;k++) { 3162 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3163 } 3164 } 3165 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3166 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3167 } 3168 ierr = PetscFree(sums);CHKERRQ(ierr); 3169 ierr = MatDenseRestoreArray(S_VVt,&marr); 3170 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3171 } 3172 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3173 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3174 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3175 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3176 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3177 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3178 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3179 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3180 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3181 } else { 3182 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3183 } 3184 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3185 3186 /* coarse basis functions */ 3187 for (i=0;i<n_vertices;i++) { 3188 PetscScalar *y; 3189 3190 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3191 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3192 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3193 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3194 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3195 y[n_B*i+idx_V_B[i]] = 1.0; 3196 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3197 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3198 3199 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3200 PetscInt j; 3201 3202 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3203 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3204 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3205 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3206 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3207 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3208 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3209 } 3210 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3211 } 3212 /* if n_R == 0 the object is not destroyed */ 3213 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3214 } 3215 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3216 3217 if (n_constraints) { 3218 Mat B; 3219 3220 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3221 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3222 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3223 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3224 if (n_vertices) { 3225 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3226 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3227 } else { 3228 Mat S_VCt; 3229 3230 if (lda_rhs != n_R) { 3231 ierr = MatDestroy(&B);CHKERRQ(ierr); 3232 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3233 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3234 } 3235 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3236 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3237 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3238 } 3239 } 3240 ierr = MatDestroy(&B);CHKERRQ(ierr); 3241 /* coarse basis functions */ 3242 for (i=0;i<n_constraints;i++) { 3243 PetscScalar *y; 3244 3245 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3246 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3247 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3248 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3249 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3250 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3251 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3252 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3253 PetscInt j; 3254 3255 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3256 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3257 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3258 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3259 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3260 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3261 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3262 } 3263 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3264 } 3265 } 3266 if (n_constraints) { 3267 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3268 } 3269 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3270 3271 /* coarse matrix entries relative to B_0 */ 3272 if (pcbddc->benign_n) { 3273 Mat B0_B,B0_BPHI; 3274 IS is_dummy; 3275 PetscScalar *data; 3276 PetscInt j; 3277 3278 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3279 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3280 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3281 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3282 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3283 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3284 for (j=0;j<pcbddc->benign_n;j++) { 3285 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3286 for (i=0;i<pcbddc->local_primal_size;i++) { 3287 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3288 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3289 } 3290 } 3291 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3292 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3293 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3294 } 3295 3296 /* compute other basis functions for non-symmetric problems */ 3297 if (!pcbddc->symmetric_primal) { 3298 Mat B_V=NULL,B_C=NULL; 3299 PetscScalar *marray; 3300 3301 if (n_constraints) { 3302 Mat S_CCT,C_CRT; 3303 3304 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3305 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3306 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3307 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3308 if (n_vertices) { 3309 Mat S_VCT; 3310 3311 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3312 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3313 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3314 } 3315 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3316 } else { 3317 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3318 } 3319 if (n_vertices && n_R) { 3320 PetscScalar *av,*marray; 3321 const PetscInt *xadj,*adjncy; 3322 PetscInt n; 3323 PetscBool flg_row; 3324 3325 /* B_V = B_V - A_VR^T */ 3326 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3327 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3328 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3329 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3330 for (i=0;i<n;i++) { 3331 PetscInt j; 3332 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3333 } 3334 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3335 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3336 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3337 } 3338 3339 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3340 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3341 for (i=0;i<n_vertices;i++) { 3342 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3343 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3344 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3345 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3346 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3347 } 3348 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3349 if (B_C) { 3350 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3351 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3352 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3353 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3354 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3355 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3356 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3357 } 3358 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3359 } 3360 /* coarse basis functions */ 3361 for (i=0;i<pcbddc->local_primal_size;i++) { 3362 PetscScalar *y; 3363 3364 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3365 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3366 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3367 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3368 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3369 if (i<n_vertices) { 3370 y[n_B*i+idx_V_B[i]] = 1.0; 3371 } 3372 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3373 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3374 3375 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3376 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3377 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3378 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3379 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3380 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3381 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3382 } 3383 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3384 } 3385 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3386 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3387 } 3388 /* free memory */ 3389 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3390 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3391 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3392 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3393 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3394 ierr = PetscFree(work);CHKERRQ(ierr); 3395 if (n_vertices) { 3396 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3397 } 3398 if (n_constraints) { 3399 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3400 } 3401 /* Checking coarse_sub_mat and coarse basis functios */ 3402 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3403 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3404 if (pcbddc->dbg_flag) { 3405 Mat coarse_sub_mat; 3406 Mat AUXMAT,TM1,TM2,TM3,TM4; 3407 Mat coarse_phi_D,coarse_phi_B; 3408 Mat coarse_psi_D,coarse_psi_B; 3409 Mat A_II,A_BB,A_IB,A_BI; 3410 Mat C_B,CPHI; 3411 IS is_dummy; 3412 Vec mones; 3413 MatType checkmattype=MATSEQAIJ; 3414 PetscReal real_value; 3415 3416 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3417 Mat A; 3418 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3419 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3420 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3421 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3422 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3423 ierr = MatDestroy(&A);CHKERRQ(ierr); 3424 } else { 3425 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3426 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3427 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3428 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3429 } 3430 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3431 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3432 if (!pcbddc->symmetric_primal) { 3433 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3434 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3435 } 3436 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3437 3438 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3439 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3440 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3441 if (!pcbddc->symmetric_primal) { 3442 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3443 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3444 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3445 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3446 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3447 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3448 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3449 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3450 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3451 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3452 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3453 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3454 } else { 3455 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3456 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3457 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3458 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3459 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3460 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3461 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3462 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3463 } 3464 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3465 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3466 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3467 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3468 if (pcbddc->benign_n) { 3469 Mat B0_B,B0_BPHI; 3470 PetscScalar *data,*data2; 3471 PetscInt j; 3472 3473 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3474 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3475 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3476 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3477 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3478 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3479 for (j=0;j<pcbddc->benign_n;j++) { 3480 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3481 for (i=0;i<pcbddc->local_primal_size;i++) { 3482 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3483 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3484 } 3485 } 3486 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3487 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3488 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3489 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3490 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3491 } 3492 #if 0 3493 { 3494 PetscViewer viewer; 3495 char filename[256]; 3496 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3497 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3498 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3499 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3500 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3501 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3502 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3503 if (save_change) { 3504 Mat phi_B; 3505 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3506 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3507 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3508 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3509 } else { 3510 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3511 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3512 } 3513 if (pcbddc->coarse_phi_D) { 3514 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3515 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3516 } 3517 if (pcbddc->coarse_psi_B) { 3518 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3519 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3520 } 3521 if (pcbddc->coarse_psi_D) { 3522 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3523 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3524 } 3525 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3526 } 3527 #endif 3528 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3529 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3530 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3531 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3532 3533 /* check constraints */ 3534 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3535 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3536 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3537 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3538 } else { 3539 PetscScalar *data; 3540 Mat tmat; 3541 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3542 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3543 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3544 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3545 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3546 } 3547 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3548 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3549 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3550 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3551 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3552 if (!pcbddc->symmetric_primal) { 3553 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3554 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3555 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3556 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3557 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3558 } 3559 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3560 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3561 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3562 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3563 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3564 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3565 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3566 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3567 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3568 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3569 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3570 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3571 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3572 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3573 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3574 if (!pcbddc->symmetric_primal) { 3575 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3576 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3577 } 3578 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3579 } 3580 /* get back data */ 3581 *coarse_submat_vals_n = coarse_submat_vals; 3582 PetscFunctionReturn(0); 3583 } 3584 3585 #undef __FUNCT__ 3586 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3587 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3588 { 3589 Mat *work_mat; 3590 IS isrow_s,iscol_s; 3591 PetscBool rsorted,csorted; 3592 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3593 PetscErrorCode ierr; 3594 3595 PetscFunctionBegin; 3596 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3597 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3598 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3599 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3600 3601 if (!rsorted) { 3602 const PetscInt *idxs; 3603 PetscInt *idxs_sorted,i; 3604 3605 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3606 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3607 for (i=0;i<rsize;i++) { 3608 idxs_perm_r[i] = i; 3609 } 3610 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3611 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3612 for (i=0;i<rsize;i++) { 3613 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3614 } 3615 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3616 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3617 } else { 3618 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3619 isrow_s = isrow; 3620 } 3621 3622 if (!csorted) { 3623 if (isrow == iscol) { 3624 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3625 iscol_s = isrow_s; 3626 } else { 3627 const PetscInt *idxs; 3628 PetscInt *idxs_sorted,i; 3629 3630 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3631 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3632 for (i=0;i<csize;i++) { 3633 idxs_perm_c[i] = i; 3634 } 3635 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3636 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3637 for (i=0;i<csize;i++) { 3638 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3639 } 3640 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3641 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3642 } 3643 } else { 3644 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3645 iscol_s = iscol; 3646 } 3647 3648 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3649 3650 if (!rsorted || !csorted) { 3651 Mat new_mat; 3652 IS is_perm_r,is_perm_c; 3653 3654 if (!rsorted) { 3655 PetscInt *idxs_r,i; 3656 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3657 for (i=0;i<rsize;i++) { 3658 idxs_r[idxs_perm_r[i]] = i; 3659 } 3660 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3661 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3662 } else { 3663 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3664 } 3665 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3666 3667 if (!csorted) { 3668 if (isrow_s == iscol_s) { 3669 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3670 is_perm_c = is_perm_r; 3671 } else { 3672 PetscInt *idxs_c,i; 3673 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3674 for (i=0;i<csize;i++) { 3675 idxs_c[idxs_perm_c[i]] = i; 3676 } 3677 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3678 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3679 } 3680 } else { 3681 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3682 } 3683 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3684 3685 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3686 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3687 work_mat[0] = new_mat; 3688 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3689 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3690 } 3691 3692 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 3693 *B = work_mat[0]; 3694 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 3695 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 3696 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 3697 PetscFunctionReturn(0); 3698 } 3699 3700 #undef __FUNCT__ 3701 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 3702 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 3703 { 3704 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3705 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3706 Mat new_mat; 3707 IS is_local,is_global; 3708 PetscInt local_size; 3709 PetscBool isseqaij; 3710 PetscErrorCode ierr; 3711 3712 PetscFunctionBegin; 3713 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3714 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 3715 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 3716 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 3717 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 3718 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 3719 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 3720 3721 /* check */ 3722 if (pcbddc->dbg_flag) { 3723 Vec x,x_change; 3724 PetscReal error; 3725 3726 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 3727 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3728 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 3729 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3730 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3731 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 3732 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3733 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3734 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3735 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3736 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3737 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 3738 ierr = VecDestroy(&x);CHKERRQ(ierr); 3739 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3740 } 3741 3742 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 3743 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3744 if (isseqaij) { 3745 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3746 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3747 } else { 3748 Mat work_mat; 3749 3750 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3751 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3752 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3753 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 3754 } 3755 if (matis->A->symmetric_set) { 3756 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 3757 #if !defined(PETSC_USE_COMPLEX) 3758 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 3759 #endif 3760 } 3761 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 3762 PetscFunctionReturn(0); 3763 } 3764 3765 #undef __FUNCT__ 3766 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 3767 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 3768 { 3769 PC_IS* pcis = (PC_IS*)(pc->data); 3770 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3771 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3772 PetscInt *idx_R_local=NULL; 3773 PetscInt n_vertices,i,j,n_R,n_D,n_B; 3774 PetscInt vbs,bs; 3775 PetscBT bitmask=NULL; 3776 PetscErrorCode ierr; 3777 3778 PetscFunctionBegin; 3779 /* 3780 No need to setup local scatters if 3781 - primal space is unchanged 3782 AND 3783 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 3784 AND 3785 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 3786 */ 3787 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 3788 PetscFunctionReturn(0); 3789 } 3790 /* destroy old objects */ 3791 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3792 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3793 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3794 /* Set Non-overlapping dimensions */ 3795 n_B = pcis->n_B; 3796 n_D = pcis->n - n_B; 3797 n_vertices = pcbddc->n_vertices; 3798 3799 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 3800 3801 /* create auxiliary bitmask and allocate workspace */ 3802 if (!sub_schurs || !sub_schurs->reuse_solver) { 3803 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 3804 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 3805 for (i=0;i<n_vertices;i++) { 3806 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 3807 } 3808 3809 for (i=0, n_R=0; i<pcis->n; i++) { 3810 if (!PetscBTLookup(bitmask,i)) { 3811 idx_R_local[n_R++] = i; 3812 } 3813 } 3814 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 3815 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3816 3817 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3818 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 3819 } 3820 3821 /* Block code */ 3822 vbs = 1; 3823 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 3824 if (bs>1 && !(n_vertices%bs)) { 3825 PetscBool is_blocked = PETSC_TRUE; 3826 PetscInt *vary; 3827 if (!sub_schurs || !sub_schurs->reuse_solver) { 3828 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 3829 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 3830 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 3831 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 3832 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 3833 for (i=0; i<pcis->n/bs; i++) { 3834 if (vary[i]!=0 && vary[i]!=bs) { 3835 is_blocked = PETSC_FALSE; 3836 break; 3837 } 3838 } 3839 ierr = PetscFree(vary);CHKERRQ(ierr); 3840 } else { 3841 /* Verify directly the R set */ 3842 for (i=0; i<n_R/bs; i++) { 3843 PetscInt j,node=idx_R_local[bs*i]; 3844 for (j=1; j<bs; j++) { 3845 if (node != idx_R_local[bs*i+j]-j) { 3846 is_blocked = PETSC_FALSE; 3847 break; 3848 } 3849 } 3850 } 3851 } 3852 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 3853 vbs = bs; 3854 for (i=0;i<n_R/vbs;i++) { 3855 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 3856 } 3857 } 3858 } 3859 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 3860 if (sub_schurs && sub_schurs->reuse_solver) { 3861 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3862 3863 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3864 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 3865 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 3866 reuse_solver->is_R = pcbddc->is_R_local; 3867 } else { 3868 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 3869 } 3870 3871 /* print some info if requested */ 3872 if (pcbddc->dbg_flag) { 3873 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3874 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3875 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3876 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3878 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3879 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3880 } 3881 3882 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3883 if (!sub_schurs || !sub_schurs->reuse_solver) { 3884 IS is_aux1,is_aux2; 3885 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3886 3887 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3888 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3889 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3890 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3891 for (i=0; i<n_D; i++) { 3892 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3893 } 3894 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3895 for (i=0, j=0; i<n_R; i++) { 3896 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3897 aux_array1[j++] = i; 3898 } 3899 } 3900 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3901 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3902 for (i=0, j=0; i<n_B; i++) { 3903 if (!PetscBTLookup(bitmask,is_indices[i])) { 3904 aux_array2[j++] = i; 3905 } 3906 } 3907 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3908 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3909 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3910 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3911 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3912 3913 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3914 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3915 for (i=0, j=0; i<n_R; i++) { 3916 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3917 aux_array1[j++] = i; 3918 } 3919 } 3920 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3921 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3922 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3923 } 3924 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3925 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3926 } else { 3927 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3928 IS tis; 3929 PetscInt schur_size; 3930 3931 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3932 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3933 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3934 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3935 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3936 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3937 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3938 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3939 } 3940 } 3941 PetscFunctionReturn(0); 3942 } 3943 3944 3945 #undef __FUNCT__ 3946 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3947 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3948 { 3949 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3950 PC_IS *pcis = (PC_IS*)pc->data; 3951 PC pc_temp; 3952 Mat A_RR; 3953 MatReuse reuse; 3954 PetscScalar m_one = -1.0; 3955 PetscReal value; 3956 PetscInt n_D,n_R; 3957 PetscBool check_corr[2],issbaij; 3958 PetscErrorCode ierr; 3959 /* prefixes stuff */ 3960 char dir_prefix[256],neu_prefix[256],str_level[16]; 3961 size_t len; 3962 3963 PetscFunctionBegin; 3964 3965 /* compute prefixes */ 3966 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3967 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3968 if (!pcbddc->current_level) { 3969 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3970 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3971 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3972 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3973 } else { 3974 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3975 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3976 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3977 len -= 15; /* remove "pc_bddc_coarse_" */ 3978 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3979 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3980 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3981 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3982 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3983 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3984 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3985 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3986 } 3987 3988 /* DIRICHLET PROBLEM */ 3989 if (dirichlet) { 3990 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3991 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3992 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3993 if (pcbddc->dbg_flag) { 3994 Mat A_IIn; 3995 3996 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3997 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3998 pcis->A_II = A_IIn; 3999 } 4000 } 4001 if (pcbddc->local_mat->symmetric_set) { 4002 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4003 } 4004 /* Matrix for Dirichlet problem is pcis->A_II */ 4005 n_D = pcis->n - pcis->n_B; 4006 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4007 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4008 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4009 /* default */ 4010 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4011 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4012 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4013 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4014 if (issbaij) { 4015 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4016 } else { 4017 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4018 } 4019 /* Allow user's customization */ 4020 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4021 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4022 } 4023 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4024 if (sub_schurs && sub_schurs->reuse_solver) { 4025 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4026 4027 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4028 } 4029 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4030 if (!n_D) { 4031 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4032 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4033 } 4034 /* Set Up KSP for Dirichlet problem of BDDC */ 4035 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4036 /* set ksp_D into pcis data */ 4037 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4038 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4039 pcis->ksp_D = pcbddc->ksp_D; 4040 } 4041 4042 /* NEUMANN PROBLEM */ 4043 A_RR = 0; 4044 if (neumann) { 4045 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4046 PetscInt ibs,mbs; 4047 PetscBool issbaij; 4048 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4049 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4050 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4051 if (pcbddc->ksp_R) { /* already created ksp */ 4052 PetscInt nn_R; 4053 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4054 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4055 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4056 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4057 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4058 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4059 reuse = MAT_INITIAL_MATRIX; 4060 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4061 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4062 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4063 reuse = MAT_INITIAL_MATRIX; 4064 } else { /* safe to reuse the matrix */ 4065 reuse = MAT_REUSE_MATRIX; 4066 } 4067 } 4068 /* last check */ 4069 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4070 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4071 reuse = MAT_INITIAL_MATRIX; 4072 } 4073 } else { /* first time, so we need to create the matrix */ 4074 reuse = MAT_INITIAL_MATRIX; 4075 } 4076 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4077 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4078 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4079 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4080 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4081 if (matis->A == pcbddc->local_mat) { 4082 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4083 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4084 } else { 4085 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4086 } 4087 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4088 if (matis->A == pcbddc->local_mat) { 4089 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4090 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4091 } else { 4092 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4093 } 4094 } 4095 /* extract A_RR */ 4096 if (sub_schurs && sub_schurs->reuse_solver) { 4097 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4098 4099 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4100 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4101 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4102 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4103 } else { 4104 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4105 } 4106 } else { 4107 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4108 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4109 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4110 } 4111 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4112 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4113 } 4114 if (pcbddc->local_mat->symmetric_set) { 4115 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4116 } 4117 if (!pcbddc->ksp_R) { /* create object if not present */ 4118 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4119 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4120 /* default */ 4121 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4122 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4123 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4124 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4125 if (issbaij) { 4126 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4127 } else { 4128 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4129 } 4130 /* Allow user's customization */ 4131 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4132 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4133 } 4134 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4135 if (!n_R) { 4136 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4137 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4138 } 4139 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4140 /* Reuse solver if it is present */ 4141 if (sub_schurs && sub_schurs->reuse_solver) { 4142 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4143 4144 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4145 } 4146 /* Set Up KSP for Neumann problem of BDDC */ 4147 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4148 } 4149 4150 if (pcbddc->dbg_flag) { 4151 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4152 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4154 } 4155 4156 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4157 check_corr[0] = check_corr[1] = PETSC_FALSE; 4158 if (pcbddc->NullSpace_corr[0]) { 4159 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4160 } 4161 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4162 check_corr[0] = PETSC_TRUE; 4163 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4164 } 4165 if (neumann && pcbddc->NullSpace_corr[2]) { 4166 check_corr[1] = PETSC_TRUE; 4167 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4168 } 4169 4170 /* check Dirichlet and Neumann solvers */ 4171 if (pcbddc->dbg_flag) { 4172 if (dirichlet) { /* Dirichlet */ 4173 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4174 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4175 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4176 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4177 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4178 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4179 if (check_corr[0]) { 4180 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4181 } 4182 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4183 } 4184 if (neumann) { /* Neumann */ 4185 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4186 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4187 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4188 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4189 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4190 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4191 if (check_corr[1]) { 4192 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4193 } 4194 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4195 } 4196 } 4197 /* free Neumann problem's matrix */ 4198 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4199 PetscFunctionReturn(0); 4200 } 4201 4202 #undef __FUNCT__ 4203 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4204 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4205 { 4206 PetscErrorCode ierr; 4207 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4208 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4209 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4210 4211 PetscFunctionBegin; 4212 if (!reuse_solver) { 4213 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4214 } 4215 if (!pcbddc->switch_static) { 4216 if (applytranspose && pcbddc->local_auxmat1) { 4217 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4218 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4219 } 4220 if (!reuse_solver) { 4221 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4222 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4223 } else { 4224 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4225 4226 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4227 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4228 } 4229 } else { 4230 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4231 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4232 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4233 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4234 if (applytranspose && pcbddc->local_auxmat1) { 4235 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4236 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4237 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4238 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4239 } 4240 } 4241 if (!reuse_solver || pcbddc->switch_static) { 4242 if (applytranspose) { 4243 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4244 } else { 4245 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4246 } 4247 } else { 4248 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4249 4250 if (applytranspose) { 4251 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4252 } else { 4253 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4254 } 4255 } 4256 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4257 if (!pcbddc->switch_static) { 4258 if (!reuse_solver) { 4259 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4260 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4261 } else { 4262 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4263 4264 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4265 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4266 } 4267 if (!applytranspose && pcbddc->local_auxmat1) { 4268 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4269 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4270 } 4271 } else { 4272 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4273 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4274 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4275 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4276 if (!applytranspose && pcbddc->local_auxmat1) { 4277 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4278 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4279 } 4280 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4281 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4282 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4283 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4284 } 4285 PetscFunctionReturn(0); 4286 } 4287 4288 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4289 #undef __FUNCT__ 4290 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4291 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4292 { 4293 PetscErrorCode ierr; 4294 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4295 PC_IS* pcis = (PC_IS*) (pc->data); 4296 const PetscScalar zero = 0.0; 4297 4298 PetscFunctionBegin; 4299 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4300 if (!pcbddc->benign_apply_coarse_only) { 4301 if (applytranspose) { 4302 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4303 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4304 } else { 4305 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4306 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4307 } 4308 } else { 4309 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4310 } 4311 4312 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4313 if (pcbddc->benign_n) { 4314 PetscScalar *array; 4315 PetscInt j; 4316 4317 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4318 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4319 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4320 } 4321 4322 /* start communications from local primal nodes to rhs of coarse solver */ 4323 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4324 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4325 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4326 4327 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4328 if (pcbddc->coarse_ksp) { 4329 Mat coarse_mat; 4330 Vec rhs,sol; 4331 MatNullSpace nullsp; 4332 PetscBool isbddc = PETSC_FALSE; 4333 4334 if (pcbddc->benign_have_null) { 4335 PC coarse_pc; 4336 4337 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4338 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4339 /* we need to propagate to coarser levels the need for a possible benign correction */ 4340 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4341 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4342 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4343 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4344 } 4345 } 4346 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4347 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4348 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4349 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4350 if (nullsp) { 4351 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4352 } 4353 if (applytranspose) { 4354 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4355 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4356 } else { 4357 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4358 PC coarse_pc; 4359 4360 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4361 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4362 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4363 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4364 } else { 4365 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4366 } 4367 } 4368 /* we don't need the benign correction at coarser levels anymore */ 4369 if (pcbddc->benign_have_null && isbddc) { 4370 PC coarse_pc; 4371 PC_BDDC* coarsepcbddc; 4372 4373 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4374 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4375 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4376 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4377 } 4378 if (nullsp) { 4379 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4380 } 4381 } 4382 4383 /* Local solution on R nodes */ 4384 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4385 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4386 } 4387 /* communications from coarse sol to local primal nodes */ 4388 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4389 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4390 4391 /* Sum contributions from the two levels */ 4392 if (!pcbddc->benign_apply_coarse_only) { 4393 if (applytranspose) { 4394 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4395 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4396 } else { 4397 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4398 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4399 } 4400 /* store p0 */ 4401 if (pcbddc->benign_n) { 4402 PetscScalar *array; 4403 PetscInt j; 4404 4405 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4406 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4407 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4408 } 4409 } else { /* expand the coarse solution */ 4410 if (applytranspose) { 4411 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4412 } else { 4413 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4414 } 4415 } 4416 PetscFunctionReturn(0); 4417 } 4418 4419 #undef __FUNCT__ 4420 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4421 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4422 { 4423 PetscErrorCode ierr; 4424 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4425 PetscScalar *array; 4426 Vec from,to; 4427 4428 PetscFunctionBegin; 4429 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4430 from = pcbddc->coarse_vec; 4431 to = pcbddc->vec1_P; 4432 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4433 Vec tvec; 4434 4435 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4436 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4437 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4438 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4439 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4440 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4441 } 4442 } else { /* from local to global -> put data in coarse right hand side */ 4443 from = pcbddc->vec1_P; 4444 to = pcbddc->coarse_vec; 4445 } 4446 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4447 PetscFunctionReturn(0); 4448 } 4449 4450 #undef __FUNCT__ 4451 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4452 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4453 { 4454 PetscErrorCode ierr; 4455 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4456 PetscScalar *array; 4457 Vec from,to; 4458 4459 PetscFunctionBegin; 4460 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4461 from = pcbddc->coarse_vec; 4462 to = pcbddc->vec1_P; 4463 } else { /* from local to global -> put data in coarse right hand side */ 4464 from = pcbddc->vec1_P; 4465 to = pcbddc->coarse_vec; 4466 } 4467 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4468 if (smode == SCATTER_FORWARD) { 4469 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4470 Vec tvec; 4471 4472 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4473 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4474 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4475 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4476 } 4477 } else { 4478 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4479 ierr = VecResetArray(from);CHKERRQ(ierr); 4480 } 4481 } 4482 PetscFunctionReturn(0); 4483 } 4484 4485 /* uncomment for testing purposes */ 4486 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4487 #undef __FUNCT__ 4488 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4489 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4490 { 4491 PetscErrorCode ierr; 4492 PC_IS* pcis = (PC_IS*)(pc->data); 4493 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4494 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4495 /* one and zero */ 4496 PetscScalar one=1.0,zero=0.0; 4497 /* space to store constraints and their local indices */ 4498 PetscScalar *constraints_data; 4499 PetscInt *constraints_idxs,*constraints_idxs_B; 4500 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4501 PetscInt *constraints_n; 4502 /* iterators */ 4503 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4504 /* BLAS integers */ 4505 PetscBLASInt lwork,lierr; 4506 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4507 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4508 /* reuse */ 4509 PetscInt olocal_primal_size,olocal_primal_size_cc; 4510 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4511 /* change of basis */ 4512 PetscBool qr_needed; 4513 PetscBT change_basis,qr_needed_idx; 4514 /* auxiliary stuff */ 4515 PetscInt *nnz,*is_indices; 4516 PetscInt ncc; 4517 /* some quantities */ 4518 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4519 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4520 4521 PetscFunctionBegin; 4522 /* Destroy Mat objects computed previously */ 4523 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4524 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4525 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4526 /* save info on constraints from previous setup (if any) */ 4527 olocal_primal_size = pcbddc->local_primal_size; 4528 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4529 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4530 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4531 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4532 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4533 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4534 4535 if (!pcbddc->adaptive_selection) { 4536 IS ISForVertices,*ISForFaces,*ISForEdges; 4537 MatNullSpace nearnullsp; 4538 const Vec *nearnullvecs; 4539 Vec *localnearnullsp; 4540 PetscScalar *array; 4541 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4542 PetscBool nnsp_has_cnst; 4543 /* LAPACK working arrays for SVD or POD */ 4544 PetscBool skip_lapack,boolforchange; 4545 PetscScalar *work; 4546 PetscReal *singular_vals; 4547 #if defined(PETSC_USE_COMPLEX) 4548 PetscReal *rwork; 4549 #endif 4550 #if defined(PETSC_MISSING_LAPACK_GESVD) 4551 PetscScalar *temp_basis,*correlation_mat; 4552 #else 4553 PetscBLASInt dummy_int=1; 4554 PetscScalar dummy_scalar=1.; 4555 #endif 4556 4557 /* Get index sets for faces, edges and vertices from graph */ 4558 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4559 /* print some info */ 4560 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 4561 PetscInt nv; 4562 4563 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4564 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4565 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4567 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4569 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4570 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4571 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4572 } 4573 4574 /* free unneeded index sets */ 4575 if (!pcbddc->use_vertices) { 4576 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4577 } 4578 if (!pcbddc->use_edges) { 4579 for (i=0;i<n_ISForEdges;i++) { 4580 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4581 } 4582 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4583 n_ISForEdges = 0; 4584 } 4585 if (!pcbddc->use_faces) { 4586 for (i=0;i<n_ISForFaces;i++) { 4587 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4588 } 4589 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4590 n_ISForFaces = 0; 4591 } 4592 4593 /* check if near null space is attached to global mat */ 4594 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4595 if (nearnullsp) { 4596 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4597 /* remove any stored info */ 4598 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4599 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4600 /* store information for BDDC solver reuse */ 4601 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4602 pcbddc->onearnullspace = nearnullsp; 4603 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4604 for (i=0;i<nnsp_size;i++) { 4605 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4606 } 4607 } else { /* if near null space is not provided BDDC uses constants by default */ 4608 nnsp_size = 0; 4609 nnsp_has_cnst = PETSC_TRUE; 4610 } 4611 /* get max number of constraints on a single cc */ 4612 max_constraints = nnsp_size; 4613 if (nnsp_has_cnst) max_constraints++; 4614 4615 /* 4616 Evaluate maximum storage size needed by the procedure 4617 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4618 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4619 There can be multiple constraints per connected component 4620 */ 4621 n_vertices = 0; 4622 if (ISForVertices) { 4623 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4624 } 4625 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4626 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4627 4628 total_counts = n_ISForFaces+n_ISForEdges; 4629 total_counts *= max_constraints; 4630 total_counts += n_vertices; 4631 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4632 4633 total_counts = 0; 4634 max_size_of_constraint = 0; 4635 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4636 IS used_is; 4637 if (i<n_ISForEdges) { 4638 used_is = ISForEdges[i]; 4639 } else { 4640 used_is = ISForFaces[i-n_ISForEdges]; 4641 } 4642 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4643 total_counts += j; 4644 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4645 } 4646 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4647 4648 /* get local part of global near null space vectors */ 4649 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4650 for (k=0;k<nnsp_size;k++) { 4651 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4652 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4653 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4654 } 4655 4656 /* whether or not to skip lapack calls */ 4657 skip_lapack = PETSC_TRUE; 4658 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4659 4660 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4661 if (!skip_lapack) { 4662 PetscScalar temp_work; 4663 4664 #if defined(PETSC_MISSING_LAPACK_GESVD) 4665 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4666 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4667 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4668 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4669 #if defined(PETSC_USE_COMPLEX) 4670 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4671 #endif 4672 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4673 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4674 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4675 lwork = -1; 4676 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4677 #if !defined(PETSC_USE_COMPLEX) 4678 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4679 #else 4680 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4681 #endif 4682 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4683 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4684 #else /* on missing GESVD */ 4685 /* SVD */ 4686 PetscInt max_n,min_n; 4687 max_n = max_size_of_constraint; 4688 min_n = max_constraints; 4689 if (max_size_of_constraint < max_constraints) { 4690 min_n = max_size_of_constraint; 4691 max_n = max_constraints; 4692 } 4693 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 4694 #if defined(PETSC_USE_COMPLEX) 4695 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 4696 #endif 4697 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4698 lwork = -1; 4699 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 4700 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 4701 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 4702 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4703 #if !defined(PETSC_USE_COMPLEX) 4704 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 4705 #else 4706 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 4707 #endif 4708 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4709 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 4710 #endif /* on missing GESVD */ 4711 /* Allocate optimal workspace */ 4712 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 4713 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 4714 } 4715 /* Now we can loop on constraining sets */ 4716 total_counts = 0; 4717 constraints_idxs_ptr[0] = 0; 4718 constraints_data_ptr[0] = 0; 4719 /* vertices */ 4720 if (n_vertices) { 4721 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4722 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4723 for (i=0;i<n_vertices;i++) { 4724 constraints_n[total_counts] = 1; 4725 constraints_data[total_counts] = 1.0; 4726 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 4727 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 4728 total_counts++; 4729 } 4730 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4731 n_vertices = total_counts; 4732 } 4733 4734 /* edges and faces */ 4735 total_counts_cc = total_counts; 4736 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 4737 IS used_is; 4738 PetscBool idxs_copied = PETSC_FALSE; 4739 4740 if (ncc<n_ISForEdges) { 4741 used_is = ISForEdges[ncc]; 4742 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 4743 } else { 4744 used_is = ISForFaces[ncc-n_ISForEdges]; 4745 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 4746 } 4747 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 4748 4749 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 4750 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4751 /* change of basis should not be performed on local periodic nodes */ 4752 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 4753 if (nnsp_has_cnst) { 4754 PetscScalar quad_value; 4755 4756 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4757 idxs_copied = PETSC_TRUE; 4758 4759 if (!pcbddc->use_nnsp_true) { 4760 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 4761 } else { 4762 quad_value = 1.0; 4763 } 4764 for (j=0;j<size_of_constraint;j++) { 4765 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 4766 } 4767 temp_constraints++; 4768 total_counts++; 4769 } 4770 for (k=0;k<nnsp_size;k++) { 4771 PetscReal real_value; 4772 PetscScalar *ptr_to_data; 4773 4774 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4775 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 4776 for (j=0;j<size_of_constraint;j++) { 4777 ptr_to_data[j] = array[is_indices[j]]; 4778 } 4779 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4780 /* check if array is null on the connected component */ 4781 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4782 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 4783 if (real_value > 0.0) { /* keep indices and values */ 4784 temp_constraints++; 4785 total_counts++; 4786 if (!idxs_copied) { 4787 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4788 idxs_copied = PETSC_TRUE; 4789 } 4790 } 4791 } 4792 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4793 valid_constraints = temp_constraints; 4794 if (!pcbddc->use_nnsp_true && temp_constraints) { 4795 if (temp_constraints == 1) { /* just normalize the constraint */ 4796 PetscScalar norm,*ptr_to_data; 4797 4798 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 4799 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4800 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 4801 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 4802 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 4803 } else { /* perform SVD */ 4804 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 4805 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 4806 4807 #if defined(PETSC_MISSING_LAPACK_GESVD) 4808 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 4809 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 4810 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 4811 the constraints basis will differ (by a complex factor with absolute value equal to 1) 4812 from that computed using LAPACKgesvd 4813 -> This is due to a different computation of eigenvectors in LAPACKheev 4814 -> The quality of the POD-computed basis will be the same */ 4815 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 4816 /* Store upper triangular part of correlation matrix */ 4817 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4818 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4819 for (j=0;j<temp_constraints;j++) { 4820 for (k=0;k<j+1;k++) { 4821 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 4822 } 4823 } 4824 /* compute eigenvalues and eigenvectors of correlation matrix */ 4825 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4826 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 4827 #if !defined(PETSC_USE_COMPLEX) 4828 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 4829 #else 4830 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 4831 #endif 4832 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4833 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 4834 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 4835 j = 0; 4836 while (j < temp_constraints && singular_vals[j] < tol) j++; 4837 total_counts = total_counts-j; 4838 valid_constraints = temp_constraints-j; 4839 /* scale and copy POD basis into used quadrature memory */ 4840 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4841 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4842 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 4843 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4844 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 4845 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4846 if (j<temp_constraints) { 4847 PetscInt ii; 4848 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 4849 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4850 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 4851 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4852 for (k=0;k<temp_constraints-j;k++) { 4853 for (ii=0;ii<size_of_constraint;ii++) { 4854 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 4855 } 4856 } 4857 } 4858 #else /* on missing GESVD */ 4859 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4860 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4861 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4862 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4863 #if !defined(PETSC_USE_COMPLEX) 4864 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 4865 #else 4866 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 4867 #endif 4868 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 4869 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4870 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 4871 k = temp_constraints; 4872 if (k > size_of_constraint) k = size_of_constraint; 4873 j = 0; 4874 while (j < k && singular_vals[k-j-1] < tol) j++; 4875 valid_constraints = k-j; 4876 total_counts = total_counts-temp_constraints+valid_constraints; 4877 #endif /* on missing GESVD */ 4878 } 4879 } 4880 /* update pointers information */ 4881 if (valid_constraints) { 4882 constraints_n[total_counts_cc] = valid_constraints; 4883 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4884 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4885 /* set change_of_basis flag */ 4886 if (boolforchange) { 4887 PetscBTSet(change_basis,total_counts_cc); 4888 } 4889 total_counts_cc++; 4890 } 4891 } 4892 /* free workspace */ 4893 if (!skip_lapack) { 4894 ierr = PetscFree(work);CHKERRQ(ierr); 4895 #if defined(PETSC_USE_COMPLEX) 4896 ierr = PetscFree(rwork);CHKERRQ(ierr); 4897 #endif 4898 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4899 #if defined(PETSC_MISSING_LAPACK_GESVD) 4900 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4901 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4902 #endif 4903 } 4904 for (k=0;k<nnsp_size;k++) { 4905 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4906 } 4907 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4908 /* free index sets of faces, edges and vertices */ 4909 for (i=0;i<n_ISForFaces;i++) { 4910 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4911 } 4912 if (n_ISForFaces) { 4913 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4914 } 4915 for (i=0;i<n_ISForEdges;i++) { 4916 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4917 } 4918 if (n_ISForEdges) { 4919 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4920 } 4921 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4922 } else { 4923 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4924 4925 total_counts = 0; 4926 n_vertices = 0; 4927 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4928 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4929 } 4930 max_constraints = 0; 4931 total_counts_cc = 0; 4932 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4933 total_counts += pcbddc->adaptive_constraints_n[i]; 4934 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4935 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4936 } 4937 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4938 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4939 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4940 constraints_data = pcbddc->adaptive_constraints_data; 4941 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4942 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4943 total_counts_cc = 0; 4944 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4945 if (pcbddc->adaptive_constraints_n[i]) { 4946 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4947 } 4948 } 4949 #if 0 4950 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4951 for (i=0;i<total_counts_cc;i++) { 4952 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4953 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4954 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4955 printf(" %d",constraints_idxs[j]); 4956 } 4957 printf("\n"); 4958 printf("number of cc: %d\n",constraints_n[i]); 4959 } 4960 for (i=0;i<n_vertices;i++) { 4961 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4962 } 4963 for (i=0;i<sub_schurs->n_subs;i++) { 4964 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4965 } 4966 #endif 4967 4968 max_size_of_constraint = 0; 4969 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4970 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4971 /* Change of basis */ 4972 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4973 if (pcbddc->use_change_of_basis) { 4974 for (i=0;i<sub_schurs->n_subs;i++) { 4975 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4976 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4977 } 4978 } 4979 } 4980 } 4981 pcbddc->local_primal_size = total_counts; 4982 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4983 4984 /* map constraints_idxs in boundary numbering */ 4985 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4986 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4987 4988 /* Create constraint matrix */ 4989 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4990 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4991 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4992 4993 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4994 /* determine if a QR strategy is needed for change of basis */ 4995 qr_needed = PETSC_FALSE; 4996 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4997 total_primal_vertices=0; 4998 pcbddc->local_primal_size_cc = 0; 4999 for (i=0;i<total_counts_cc;i++) { 5000 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5001 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5002 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5003 pcbddc->local_primal_size_cc += 1; 5004 } else if (PetscBTLookup(change_basis,i)) { 5005 for (k=0;k<constraints_n[i];k++) { 5006 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5007 } 5008 pcbddc->local_primal_size_cc += constraints_n[i]; 5009 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5010 PetscBTSet(qr_needed_idx,i); 5011 qr_needed = PETSC_TRUE; 5012 } 5013 } else { 5014 pcbddc->local_primal_size_cc += 1; 5015 } 5016 } 5017 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5018 pcbddc->n_vertices = total_primal_vertices; 5019 /* permute indices in order to have a sorted set of vertices */ 5020 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5021 5022 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5023 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5024 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5025 5026 /* nonzero structure of constraint matrix */ 5027 /* and get reference dof for local constraints */ 5028 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5029 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5030 5031 j = total_primal_vertices; 5032 total_counts = total_primal_vertices; 5033 cum = total_primal_vertices; 5034 for (i=n_vertices;i<total_counts_cc;i++) { 5035 if (!PetscBTLookup(change_basis,i)) { 5036 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5037 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5038 cum++; 5039 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5040 for (k=0;k<constraints_n[i];k++) { 5041 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5042 nnz[j+k] = size_of_constraint; 5043 } 5044 j += constraints_n[i]; 5045 } 5046 } 5047 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5048 ierr = PetscFree(nnz);CHKERRQ(ierr); 5049 5050 /* set values in constraint matrix */ 5051 for (i=0;i<total_primal_vertices;i++) { 5052 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5053 } 5054 total_counts = total_primal_vertices; 5055 for (i=n_vertices;i<total_counts_cc;i++) { 5056 if (!PetscBTLookup(change_basis,i)) { 5057 PetscInt *cols; 5058 5059 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5060 cols = constraints_idxs+constraints_idxs_ptr[i]; 5061 for (k=0;k<constraints_n[i];k++) { 5062 PetscInt row = total_counts+k; 5063 PetscScalar *vals; 5064 5065 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5066 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5067 } 5068 total_counts += constraints_n[i]; 5069 } 5070 } 5071 /* assembling */ 5072 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5073 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5074 5075 /* 5076 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5077 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5078 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5079 */ 5080 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5081 if (pcbddc->use_change_of_basis) { 5082 /* dual and primal dofs on a single cc */ 5083 PetscInt dual_dofs,primal_dofs; 5084 /* working stuff for GEQRF */ 5085 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5086 PetscBLASInt lqr_work; 5087 /* working stuff for UNGQR */ 5088 PetscScalar *gqr_work,lgqr_work_t; 5089 PetscBLASInt lgqr_work; 5090 /* working stuff for TRTRS */ 5091 PetscScalar *trs_rhs; 5092 PetscBLASInt Blas_NRHS; 5093 /* pointers for values insertion into change of basis matrix */ 5094 PetscInt *start_rows,*start_cols; 5095 PetscScalar *start_vals; 5096 /* working stuff for values insertion */ 5097 PetscBT is_primal; 5098 PetscInt *aux_primal_numbering_B; 5099 /* matrix sizes */ 5100 PetscInt global_size,local_size; 5101 /* temporary change of basis */ 5102 Mat localChangeOfBasisMatrix; 5103 /* extra space for debugging */ 5104 PetscScalar *dbg_work; 5105 5106 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5107 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5108 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5109 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5110 /* nonzeros for local mat */ 5111 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5112 if (!pcbddc->benign_change || pcbddc->fake_change) { 5113 for (i=0;i<pcis->n;i++) nnz[i]=1; 5114 } else { 5115 const PetscInt *ii; 5116 PetscInt n; 5117 PetscBool flg_row; 5118 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5119 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5120 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5121 } 5122 for (i=n_vertices;i<total_counts_cc;i++) { 5123 if (PetscBTLookup(change_basis,i)) { 5124 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5125 if (PetscBTLookup(qr_needed_idx,i)) { 5126 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5127 } else { 5128 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5129 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5130 } 5131 } 5132 } 5133 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5134 ierr = PetscFree(nnz);CHKERRQ(ierr); 5135 /* Set interior change in the matrix */ 5136 if (!pcbddc->benign_change || pcbddc->fake_change) { 5137 for (i=0;i<pcis->n;i++) { 5138 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5139 } 5140 } else { 5141 const PetscInt *ii,*jj; 5142 PetscScalar *aa; 5143 PetscInt n; 5144 PetscBool flg_row; 5145 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5146 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5147 for (i=0;i<n;i++) { 5148 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5149 } 5150 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5151 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5152 } 5153 5154 if (pcbddc->dbg_flag) { 5155 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5156 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5157 } 5158 5159 5160 /* Now we loop on the constraints which need a change of basis */ 5161 /* 5162 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5163 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5164 5165 Basic blocks of change of basis matrix T computed by 5166 5167 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5168 5169 | 1 0 ... 0 s_1/S | 5170 | 0 1 ... 0 s_2/S | 5171 | ... | 5172 | 0 ... 1 s_{n-1}/S | 5173 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5174 5175 with S = \sum_{i=1}^n s_i^2 5176 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5177 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5178 5179 - QR decomposition of constraints otherwise 5180 */ 5181 if (qr_needed) { 5182 /* space to store Q */ 5183 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5184 /* array to store scaling factors for reflectors */ 5185 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5186 /* first we issue queries for optimal work */ 5187 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5188 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5189 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5190 lqr_work = -1; 5191 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5192 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5193 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5194 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5195 lgqr_work = -1; 5196 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5197 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5198 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5199 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5200 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5201 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5202 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5203 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5204 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5205 /* array to store rhs and solution of triangular solver */ 5206 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5207 /* allocating workspace for check */ 5208 if (pcbddc->dbg_flag) { 5209 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5210 } 5211 } 5212 /* array to store whether a node is primal or not */ 5213 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5214 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5215 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5216 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5217 for (i=0;i<total_primal_vertices;i++) { 5218 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5219 } 5220 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5221 5222 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5223 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5224 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5225 if (PetscBTLookup(change_basis,total_counts)) { 5226 /* get constraint info */ 5227 primal_dofs = constraints_n[total_counts]; 5228 dual_dofs = size_of_constraint-primal_dofs; 5229 5230 if (pcbddc->dbg_flag) { 5231 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5232 } 5233 5234 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5235 5236 /* copy quadrature constraints for change of basis check */ 5237 if (pcbddc->dbg_flag) { 5238 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5239 } 5240 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5241 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5242 5243 /* compute QR decomposition of constraints */ 5244 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5245 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5246 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5247 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5248 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5249 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5250 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5251 5252 /* explictly compute R^-T */ 5253 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5254 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5255 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5256 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5257 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5258 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5259 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5260 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5261 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5262 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5263 5264 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5265 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5266 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5267 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5268 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5269 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5270 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5271 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5272 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5273 5274 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5275 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5276 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5277 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5278 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5279 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5280 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5281 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5282 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5283 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5284 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5285 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5286 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5287 5288 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5289 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5290 /* insert cols for primal dofs */ 5291 for (j=0;j<primal_dofs;j++) { 5292 start_vals = &qr_basis[j*size_of_constraint]; 5293 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5294 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5295 } 5296 /* insert cols for dual dofs */ 5297 for (j=0,k=0;j<dual_dofs;k++) { 5298 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5299 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5300 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5301 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5302 j++; 5303 } 5304 } 5305 5306 /* check change of basis */ 5307 if (pcbddc->dbg_flag) { 5308 PetscInt ii,jj; 5309 PetscBool valid_qr=PETSC_TRUE; 5310 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5311 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5312 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5313 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5314 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5315 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5316 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5317 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5318 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5319 for (jj=0;jj<size_of_constraint;jj++) { 5320 for (ii=0;ii<primal_dofs;ii++) { 5321 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5322 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5323 } 5324 } 5325 if (!valid_qr) { 5326 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5327 for (jj=0;jj<size_of_constraint;jj++) { 5328 for (ii=0;ii<primal_dofs;ii++) { 5329 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5330 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5331 } 5332 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5333 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5334 } 5335 } 5336 } 5337 } else { 5338 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5339 } 5340 } 5341 } else { /* simple transformation block */ 5342 PetscInt row,col; 5343 PetscScalar val,norm; 5344 5345 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5346 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5347 for (j=0;j<size_of_constraint;j++) { 5348 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5349 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5350 if (!PetscBTLookup(is_primal,row_B)) { 5351 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5352 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5353 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5354 } else { 5355 for (k=0;k<size_of_constraint;k++) { 5356 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5357 if (row != col) { 5358 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5359 } else { 5360 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5361 } 5362 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5363 } 5364 } 5365 } 5366 if (pcbddc->dbg_flag) { 5367 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5368 } 5369 } 5370 } else { 5371 if (pcbddc->dbg_flag) { 5372 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5373 } 5374 } 5375 } 5376 5377 /* free workspace */ 5378 if (qr_needed) { 5379 if (pcbddc->dbg_flag) { 5380 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5381 } 5382 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5383 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5384 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5385 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5386 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5387 } 5388 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5389 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5390 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5391 5392 /* assembling of global change of variable */ 5393 if (!pcbddc->fake_change) { 5394 Mat tmat; 5395 PetscInt bs; 5396 5397 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5398 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5399 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5400 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5401 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5402 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5403 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5404 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5405 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5406 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5407 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5408 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5409 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5410 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5411 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5412 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5413 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5414 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5415 5416 /* check */ 5417 if (pcbddc->dbg_flag) { 5418 PetscReal error; 5419 Vec x,x_change; 5420 5421 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5422 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5423 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5424 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5425 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5426 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5427 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5428 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5429 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5430 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5431 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5432 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5433 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5434 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5435 ierr = VecDestroy(&x);CHKERRQ(ierr); 5436 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5437 } 5438 /* adapt sub_schurs computed (if any) */ 5439 if (pcbddc->use_deluxe_scaling) { 5440 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5441 5442 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5443 if (sub_schurs && sub_schurs->S_Ej_all) { 5444 Mat S_new,tmat; 5445 IS is_all_N,is_V_Sall = NULL; 5446 5447 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5448 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5449 if (pcbddc->deluxe_zerorows) { 5450 ISLocalToGlobalMapping NtoSall; 5451 IS is_V; 5452 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5453 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5454 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5455 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5456 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5457 } 5458 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5459 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5460 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5461 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5462 if (pcbddc->deluxe_zerorows) { 5463 const PetscScalar *array; 5464 const PetscInt *idxs_V,*idxs_all; 5465 PetscInt i,n_V; 5466 5467 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5468 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5469 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5470 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5471 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5472 for (i=0;i<n_V;i++) { 5473 PetscScalar val; 5474 PetscInt idx; 5475 5476 idx = idxs_V[i]; 5477 val = array[idxs_all[idxs_V[i]]]; 5478 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5479 } 5480 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5481 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5482 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5483 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5484 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5485 } 5486 sub_schurs->S_Ej_all = S_new; 5487 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5488 if (sub_schurs->sum_S_Ej_all) { 5489 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5490 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5491 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5492 if (pcbddc->deluxe_zerorows) { 5493 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5494 } 5495 sub_schurs->sum_S_Ej_all = S_new; 5496 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5497 } 5498 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5499 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5500 } 5501 /* destroy any change of basis context in sub_schurs */ 5502 if (sub_schurs && sub_schurs->change) { 5503 PetscInt i; 5504 5505 for (i=0;i<sub_schurs->n_subs;i++) { 5506 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5507 } 5508 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5509 } 5510 } 5511 if (pcbddc->switch_static) { /* need to save the local change */ 5512 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5513 } else { 5514 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5515 } 5516 /* determine if any process has changed the pressures locally */ 5517 pcbddc->change_interior = pcbddc->benign_have_null; 5518 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5519 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5520 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5521 pcbddc->use_qr_single = qr_needed; 5522 } 5523 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5524 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5525 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5526 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5527 } else { 5528 Mat benign_global = NULL; 5529 if (pcbddc->benign_have_null) { 5530 Mat tmat; 5531 5532 pcbddc->change_interior = PETSC_TRUE; 5533 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5534 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5535 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5536 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5537 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5538 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5539 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5540 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5541 if (pcbddc->benign_change) { 5542 Mat M; 5543 5544 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5545 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5546 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5547 ierr = MatDestroy(&M);CHKERRQ(ierr); 5548 } else { 5549 Mat eye; 5550 PetscScalar *array; 5551 5552 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5553 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5554 for (i=0;i<pcis->n;i++) { 5555 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5556 } 5557 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5558 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5559 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5560 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5561 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5562 } 5563 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5564 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5565 } 5566 if (pcbddc->user_ChangeOfBasisMatrix) { 5567 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5568 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5569 } else if (pcbddc->benign_have_null) { 5570 pcbddc->ChangeOfBasisMatrix = benign_global; 5571 } 5572 } 5573 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5574 IS is_global; 5575 const PetscInt *gidxs; 5576 5577 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5578 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5579 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5580 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5581 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5582 } 5583 } 5584 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5585 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5586 } 5587 5588 if (!pcbddc->fake_change) { 5589 /* add pressure dofs to set of primal nodes for numbering purposes */ 5590 for (i=0;i<pcbddc->benign_n;i++) { 5591 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5592 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5593 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5594 pcbddc->local_primal_size_cc++; 5595 pcbddc->local_primal_size++; 5596 } 5597 5598 /* check if a new primal space has been introduced (also take into account benign trick) */ 5599 pcbddc->new_primal_space_local = PETSC_TRUE; 5600 if (olocal_primal_size == pcbddc->local_primal_size) { 5601 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5602 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5603 if (!pcbddc->new_primal_space_local) { 5604 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5605 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5606 } 5607 } 5608 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5609 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5610 } 5611 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5612 5613 /* flush dbg viewer */ 5614 if (pcbddc->dbg_flag) { 5615 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5616 } 5617 5618 /* free workspace */ 5619 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5620 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5621 if (!pcbddc->adaptive_selection) { 5622 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5623 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5624 } else { 5625 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5626 pcbddc->adaptive_constraints_idxs_ptr, 5627 pcbddc->adaptive_constraints_data_ptr, 5628 pcbddc->adaptive_constraints_idxs, 5629 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5630 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5631 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5632 } 5633 PetscFunctionReturn(0); 5634 } 5635 5636 #undef __FUNCT__ 5637 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5638 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5639 { 5640 ISLocalToGlobalMapping map; 5641 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5642 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5643 PetscInt ierr,i,N; 5644 5645 PetscFunctionBegin; 5646 if (pcbddc->graphanalyzed && !pcbddc->recompute_topography) PetscFunctionReturn(0); 5647 /* Reset previously computed graph */ 5648 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5649 /* Init local Graph struct */ 5650 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5651 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5652 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5653 5654 /* Check validity of the csr graph passed in by the user */ 5655 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5656 5657 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5658 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5659 PetscInt *xadj,*adjncy; 5660 PetscInt nvtxs; 5661 PetscBool flg_row=PETSC_FALSE; 5662 5663 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5664 if (flg_row) { 5665 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5666 pcbddc->computed_rowadj = PETSC_TRUE; 5667 } 5668 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5669 } 5670 if (pcbddc->dbg_flag) { 5671 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5672 } 5673 5674 /* Setup of Graph */ 5675 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5676 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5677 5678 /* attach info on disconnected subdomains if present */ 5679 if (pcbddc->n_local_subs) { 5680 PetscInt *local_subs; 5681 5682 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5683 for (i=0;i<pcbddc->n_local_subs;i++) { 5684 const PetscInt *idxs; 5685 PetscInt nl,j; 5686 5687 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5688 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5689 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 5690 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5691 } 5692 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 5693 pcbddc->mat_graph->local_subs = local_subs; 5694 } 5695 5696 /* Graph's connected components analysis */ 5697 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 5698 5699 /* set flag indicating analysis has been done */ 5700 pcbddc->graphanalyzed = PETSC_TRUE; 5701 PetscFunctionReturn(0); 5702 } 5703 5704 #undef __FUNCT__ 5705 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 5706 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 5707 { 5708 PetscInt i,j; 5709 PetscScalar *alphas; 5710 PetscErrorCode ierr; 5711 5712 PetscFunctionBegin; 5713 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 5714 for (i=0;i<n;i++) { 5715 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 5716 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 5717 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 5718 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5719 } 5720 ierr = PetscFree(alphas);CHKERRQ(ierr); 5721 PetscFunctionReturn(0); 5722 } 5723 5724 #undef __FUNCT__ 5725 #define __FUNCT__ "MatISGetSubassemblingPattern" 5726 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5727 { 5728 Mat A; 5729 PetscInt n_neighs,*neighs,*n_shared,**shared; 5730 PetscMPIInt size,rank,color; 5731 PetscInt *xadj,*adjncy; 5732 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5733 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5734 PetscInt void_procs,*procs_candidates = NULL; 5735 PetscInt xadj_count, *count; 5736 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5737 PetscSubcomm psubcomm; 5738 MPI_Comm subcomm; 5739 PetscErrorCode ierr; 5740 5741 PetscFunctionBegin; 5742 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5743 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5744 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5745 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5746 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5747 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5748 5749 if (have_void) *have_void = PETSC_FALSE; 5750 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5751 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5752 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5753 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5754 im_active = !!(n); 5755 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5756 void_procs = size - active_procs; 5757 /* get ranks of of non-active processes in mat communicator */ 5758 if (void_procs) { 5759 PetscInt ncand; 5760 5761 if (have_void) *have_void = PETSC_TRUE; 5762 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5763 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5764 for (i=0,ncand=0;i<size;i++) { 5765 if (!procs_candidates[i]) { 5766 procs_candidates[ncand++] = i; 5767 } 5768 } 5769 /* force n_subdomains to be not greater that the number of non-active processes */ 5770 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5771 } 5772 5773 /* number of subdomains requested greater than active processes -> just shift the matrix 5774 number of subdomains requested 1 -> send to master or first candidate in voids */ 5775 if (active_procs < *n_subdomains || *n_subdomains == 1) { 5776 PetscInt issize,isidx,dest; 5777 if (*n_subdomains == 1) dest = 0; 5778 else dest = rank; 5779 if (im_active) { 5780 issize = 1; 5781 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5782 isidx = procs_candidates[dest]; 5783 } else { 5784 isidx = dest; 5785 } 5786 } else { 5787 issize = 0; 5788 isidx = -1; 5789 } 5790 *n_subdomains = active_procs; 5791 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 5792 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5793 PetscFunctionReturn(0); 5794 } 5795 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 5796 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 5797 threshold = PetscMax(threshold,2); 5798 5799 /* Get info on mapping */ 5800 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 5801 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5802 5803 /* build local CSR graph of subdomains' connectivity */ 5804 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 5805 xadj[0] = 0; 5806 xadj[1] = PetscMax(n_neighs-1,0); 5807 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 5808 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 5809 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 5810 for (i=1;i<n_neighs;i++) 5811 for (j=0;j<n_shared[i];j++) 5812 count[shared[i][j]] += 1; 5813 5814 xadj_count = 0; 5815 for (i=1;i<n_neighs;i++) { 5816 for (j=0;j<n_shared[i];j++) { 5817 if (count[shared[i][j]] < threshold) { 5818 adjncy[xadj_count] = neighs[i]; 5819 adjncy_wgt[xadj_count] = n_shared[i]; 5820 xadj_count++; 5821 break; 5822 } 5823 } 5824 } 5825 xadj[1] = xadj_count; 5826 ierr = PetscFree(count);CHKERRQ(ierr); 5827 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5828 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5829 5830 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 5831 5832 /* Restrict work on active processes only */ 5833 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 5834 if (void_procs) { 5835 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 5836 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 5837 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 5838 subcomm = PetscSubcommChild(psubcomm); 5839 } else { 5840 psubcomm = NULL; 5841 subcomm = PetscObjectComm((PetscObject)mat); 5842 } 5843 5844 v_wgt = NULL; 5845 if (!color) { 5846 ierr = PetscFree(xadj);CHKERRQ(ierr); 5847 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5848 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5849 } else { 5850 Mat subdomain_adj; 5851 IS new_ranks,new_ranks_contig; 5852 MatPartitioning partitioner; 5853 PetscInt rstart=0,rend=0; 5854 PetscInt *is_indices,*oldranks; 5855 PetscMPIInt size; 5856 PetscBool aggregate; 5857 5858 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5859 if (void_procs) { 5860 PetscInt prank = rank; 5861 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5862 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5863 for (i=0;i<xadj[1];i++) { 5864 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5865 } 5866 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5867 } else { 5868 oldranks = NULL; 5869 } 5870 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5871 if (aggregate) { /* TODO: all this part could be made more efficient */ 5872 PetscInt lrows,row,ncols,*cols; 5873 PetscMPIInt nrank; 5874 PetscScalar *vals; 5875 5876 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5877 lrows = 0; 5878 if (nrank<redprocs) { 5879 lrows = size/redprocs; 5880 if (nrank<size%redprocs) lrows++; 5881 } 5882 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5883 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5884 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5885 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5886 row = nrank; 5887 ncols = xadj[1]-xadj[0]; 5888 cols = adjncy; 5889 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5890 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5891 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5892 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5893 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5894 ierr = PetscFree(xadj);CHKERRQ(ierr); 5895 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5896 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5897 ierr = PetscFree(vals);CHKERRQ(ierr); 5898 if (use_vwgt) { 5899 Vec v; 5900 const PetscScalar *array; 5901 PetscInt nl; 5902 5903 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5904 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5905 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5906 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5907 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5908 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5909 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5910 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5911 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5912 ierr = VecDestroy(&v);CHKERRQ(ierr); 5913 } 5914 } else { 5915 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5916 if (use_vwgt) { 5917 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5918 v_wgt[0] = local_size; 5919 } 5920 } 5921 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5922 5923 /* Partition */ 5924 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5925 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5926 if (v_wgt) { 5927 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5928 } 5929 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5930 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5931 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5932 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5933 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5934 5935 /* renumber new_ranks to avoid "holes" in new set of processors */ 5936 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5937 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5938 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5939 if (!aggregate) { 5940 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5941 #if defined(PETSC_USE_DEBUG) 5942 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5943 #endif 5944 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5945 } else if (oldranks) { 5946 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5947 } else { 5948 ranks_send_to_idx[0] = is_indices[0]; 5949 } 5950 } else { 5951 PetscInt idxs[1]; 5952 PetscMPIInt tag; 5953 MPI_Request *reqs; 5954 5955 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5956 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5957 for (i=rstart;i<rend;i++) { 5958 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5959 } 5960 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5961 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5962 ierr = PetscFree(reqs);CHKERRQ(ierr); 5963 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5964 #if defined(PETSC_USE_DEBUG) 5965 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5966 #endif 5967 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5968 } else if (oldranks) { 5969 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5970 } else { 5971 ranks_send_to_idx[0] = idxs[0]; 5972 } 5973 } 5974 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5975 /* clean up */ 5976 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5977 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5978 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5979 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5980 } 5981 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5982 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5983 5984 /* assemble parallel IS for sends */ 5985 i = 1; 5986 if (!color) i=0; 5987 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5988 PetscFunctionReturn(0); 5989 } 5990 5991 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5992 5993 #undef __FUNCT__ 5994 #define __FUNCT__ "MatISSubassemble" 5995 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5996 { 5997 Mat local_mat; 5998 IS is_sends_internal; 5999 PetscInt rows,cols,new_local_rows; 6000 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6001 PetscBool ismatis,isdense,newisdense,destroy_mat; 6002 ISLocalToGlobalMapping l2gmap; 6003 PetscInt* l2gmap_indices; 6004 const PetscInt* is_indices; 6005 MatType new_local_type; 6006 /* buffers */ 6007 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6008 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6009 PetscInt *recv_buffer_idxs_local; 6010 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6011 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6012 /* MPI */ 6013 MPI_Comm comm,comm_n; 6014 PetscSubcomm subcomm; 6015 PetscMPIInt n_sends,n_recvs,commsize; 6016 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6017 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6018 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6019 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6020 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6021 PetscErrorCode ierr; 6022 6023 PetscFunctionBegin; 6024 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6025 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6026 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6027 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6028 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6029 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6030 PetscValidLogicalCollectiveBool(mat,reuse,6); 6031 PetscValidLogicalCollectiveInt(mat,nis,8); 6032 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6033 if (nvecs) { 6034 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6035 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6036 } 6037 /* further checks */ 6038 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6039 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6040 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6041 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6042 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6043 if (reuse && *mat_n) { 6044 PetscInt mrows,mcols,mnrows,mncols; 6045 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6046 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6047 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6048 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6049 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6050 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6051 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6052 } 6053 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6054 PetscValidLogicalCollectiveInt(mat,bs,0); 6055 6056 /* prepare IS for sending if not provided */ 6057 if (!is_sends) { 6058 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6059 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6060 } else { 6061 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6062 is_sends_internal = is_sends; 6063 } 6064 6065 /* get comm */ 6066 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6067 6068 /* compute number of sends */ 6069 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6070 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6071 6072 /* compute number of receives */ 6073 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6074 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6075 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6076 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6077 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6078 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6079 ierr = PetscFree(iflags);CHKERRQ(ierr); 6080 6081 /* restrict comm if requested */ 6082 subcomm = 0; 6083 destroy_mat = PETSC_FALSE; 6084 if (restrict_comm) { 6085 PetscMPIInt color,subcommsize; 6086 6087 color = 0; 6088 if (restrict_full) { 6089 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6090 } else { 6091 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6092 } 6093 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6094 subcommsize = commsize - subcommsize; 6095 /* check if reuse has been requested */ 6096 if (reuse) { 6097 if (*mat_n) { 6098 PetscMPIInt subcommsize2; 6099 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6100 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6101 comm_n = PetscObjectComm((PetscObject)*mat_n); 6102 } else { 6103 comm_n = PETSC_COMM_SELF; 6104 } 6105 } else { /* MAT_INITIAL_MATRIX */ 6106 PetscMPIInt rank; 6107 6108 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6109 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6110 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6111 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6112 comm_n = PetscSubcommChild(subcomm); 6113 } 6114 /* flag to destroy *mat_n if not significative */ 6115 if (color) destroy_mat = PETSC_TRUE; 6116 } else { 6117 comm_n = comm; 6118 } 6119 6120 /* prepare send/receive buffers */ 6121 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6122 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6123 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6124 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6125 if (nis) { 6126 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6127 } 6128 6129 /* Get data from local matrices */ 6130 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6131 /* TODO: See below some guidelines on how to prepare the local buffers */ 6132 /* 6133 send_buffer_vals should contain the raw values of the local matrix 6134 send_buffer_idxs should contain: 6135 - MatType_PRIVATE type 6136 - PetscInt size_of_l2gmap 6137 - PetscInt global_row_indices[size_of_l2gmap] 6138 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6139 */ 6140 else { 6141 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6142 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6143 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6144 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6145 send_buffer_idxs[1] = i; 6146 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6147 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6148 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6149 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6150 for (i=0;i<n_sends;i++) { 6151 ilengths_vals[is_indices[i]] = len*len; 6152 ilengths_idxs[is_indices[i]] = len+2; 6153 } 6154 } 6155 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6156 /* additional is (if any) */ 6157 if (nis) { 6158 PetscMPIInt psum; 6159 PetscInt j; 6160 for (j=0,psum=0;j<nis;j++) { 6161 PetscInt plen; 6162 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6163 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6164 psum += len+1; /* indices + lenght */ 6165 } 6166 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6167 for (j=0,psum=0;j<nis;j++) { 6168 PetscInt plen; 6169 const PetscInt *is_array_idxs; 6170 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6171 send_buffer_idxs_is[psum] = plen; 6172 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6173 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6174 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6175 psum += plen+1; /* indices + lenght */ 6176 } 6177 for (i=0;i<n_sends;i++) { 6178 ilengths_idxs_is[is_indices[i]] = psum; 6179 } 6180 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6181 } 6182 6183 buf_size_idxs = 0; 6184 buf_size_vals = 0; 6185 buf_size_idxs_is = 0; 6186 buf_size_vecs = 0; 6187 for (i=0;i<n_recvs;i++) { 6188 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6189 buf_size_vals += (PetscInt)olengths_vals[i]; 6190 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6191 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6192 } 6193 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6194 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6195 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6196 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6197 6198 /* get new tags for clean communications */ 6199 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6200 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6201 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6202 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6203 6204 /* allocate for requests */ 6205 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6206 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6207 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6208 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6209 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6210 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6211 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6212 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6213 6214 /* communications */ 6215 ptr_idxs = recv_buffer_idxs; 6216 ptr_vals = recv_buffer_vals; 6217 ptr_idxs_is = recv_buffer_idxs_is; 6218 ptr_vecs = recv_buffer_vecs; 6219 for (i=0;i<n_recvs;i++) { 6220 source_dest = onodes[i]; 6221 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6222 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6223 ptr_idxs += olengths_idxs[i]; 6224 ptr_vals += olengths_vals[i]; 6225 if (nis) { 6226 source_dest = onodes_is[i]; 6227 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6228 ptr_idxs_is += olengths_idxs_is[i]; 6229 } 6230 if (nvecs) { 6231 source_dest = onodes[i]; 6232 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6233 ptr_vecs += olengths_idxs[i]-2; 6234 } 6235 } 6236 for (i=0;i<n_sends;i++) { 6237 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6238 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6239 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6240 if (nis) { 6241 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6242 } 6243 if (nvecs) { 6244 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6245 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6246 } 6247 } 6248 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6249 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6250 6251 /* assemble new l2g map */ 6252 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6253 ptr_idxs = recv_buffer_idxs; 6254 new_local_rows = 0; 6255 for (i=0;i<n_recvs;i++) { 6256 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6257 ptr_idxs += olengths_idxs[i]; 6258 } 6259 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6260 ptr_idxs = recv_buffer_idxs; 6261 new_local_rows = 0; 6262 for (i=0;i<n_recvs;i++) { 6263 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6264 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6265 ptr_idxs += olengths_idxs[i]; 6266 } 6267 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6268 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6269 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6270 6271 /* infer new local matrix type from received local matrices type */ 6272 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6273 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6274 if (n_recvs) { 6275 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6276 ptr_idxs = recv_buffer_idxs; 6277 for (i=0;i<n_recvs;i++) { 6278 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6279 new_local_type_private = MATAIJ_PRIVATE; 6280 break; 6281 } 6282 ptr_idxs += olengths_idxs[i]; 6283 } 6284 switch (new_local_type_private) { 6285 case MATDENSE_PRIVATE: 6286 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6287 new_local_type = MATSEQAIJ; 6288 bs = 1; 6289 } else { /* if I receive only 1 dense matrix */ 6290 new_local_type = MATSEQDENSE; 6291 bs = 1; 6292 } 6293 break; 6294 case MATAIJ_PRIVATE: 6295 new_local_type = MATSEQAIJ; 6296 bs = 1; 6297 break; 6298 case MATBAIJ_PRIVATE: 6299 new_local_type = MATSEQBAIJ; 6300 break; 6301 case MATSBAIJ_PRIVATE: 6302 new_local_type = MATSEQSBAIJ; 6303 break; 6304 default: 6305 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6306 break; 6307 } 6308 } else { /* by default, new_local_type is seqdense */ 6309 new_local_type = MATSEQDENSE; 6310 bs = 1; 6311 } 6312 6313 /* create MATIS object if needed */ 6314 if (!reuse) { 6315 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6316 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6317 } else { 6318 /* it also destroys the local matrices */ 6319 if (*mat_n) { 6320 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6321 } else { /* this is a fake object */ 6322 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6323 } 6324 } 6325 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6326 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6327 6328 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6329 6330 /* Global to local map of received indices */ 6331 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6332 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6333 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6334 6335 /* restore attributes -> type of incoming data and its size */ 6336 buf_size_idxs = 0; 6337 for (i=0;i<n_recvs;i++) { 6338 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6339 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6340 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6341 } 6342 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6343 6344 /* set preallocation */ 6345 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6346 if (!newisdense) { 6347 PetscInt *new_local_nnz=0; 6348 6349 ptr_idxs = recv_buffer_idxs_local; 6350 if (n_recvs) { 6351 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6352 } 6353 for (i=0;i<n_recvs;i++) { 6354 PetscInt j; 6355 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6356 for (j=0;j<*(ptr_idxs+1);j++) { 6357 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6358 } 6359 } else { 6360 /* TODO */ 6361 } 6362 ptr_idxs += olengths_idxs[i]; 6363 } 6364 if (new_local_nnz) { 6365 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6366 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6367 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6368 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6369 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6370 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6371 } else { 6372 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6373 } 6374 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6375 } else { 6376 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6377 } 6378 6379 /* set values */ 6380 ptr_vals = recv_buffer_vals; 6381 ptr_idxs = recv_buffer_idxs_local; 6382 for (i=0;i<n_recvs;i++) { 6383 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6384 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6385 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6386 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6387 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6388 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6389 } else { 6390 /* TODO */ 6391 } 6392 ptr_idxs += olengths_idxs[i]; 6393 ptr_vals += olengths_vals[i]; 6394 } 6395 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6396 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6397 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6398 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6399 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6400 6401 #if 0 6402 if (!restrict_comm) { /* check */ 6403 Vec lvec,rvec; 6404 PetscReal infty_error; 6405 6406 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6407 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6408 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6409 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6410 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6411 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6412 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6413 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6414 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6415 } 6416 #endif 6417 6418 /* assemble new additional is (if any) */ 6419 if (nis) { 6420 PetscInt **temp_idxs,*count_is,j,psum; 6421 6422 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6423 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6424 ptr_idxs = recv_buffer_idxs_is; 6425 psum = 0; 6426 for (i=0;i<n_recvs;i++) { 6427 for (j=0;j<nis;j++) { 6428 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6429 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6430 psum += plen; 6431 ptr_idxs += plen+1; /* shift pointer to received data */ 6432 } 6433 } 6434 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6435 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6436 for (i=1;i<nis;i++) { 6437 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6438 } 6439 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6440 ptr_idxs = recv_buffer_idxs_is; 6441 for (i=0;i<n_recvs;i++) { 6442 for (j=0;j<nis;j++) { 6443 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6444 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6445 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6446 ptr_idxs += plen+1; /* shift pointer to received data */ 6447 } 6448 } 6449 for (i=0;i<nis;i++) { 6450 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6451 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6452 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6453 } 6454 ierr = PetscFree(count_is);CHKERRQ(ierr); 6455 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6456 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6457 } 6458 /* free workspace */ 6459 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6460 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6461 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6462 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6463 if (isdense) { 6464 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6465 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6466 } else { 6467 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6468 } 6469 if (nis) { 6470 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6471 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6472 } 6473 6474 if (nvecs) { 6475 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6476 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6477 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6478 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6479 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6480 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6481 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6482 /* set values */ 6483 ptr_vals = recv_buffer_vecs; 6484 ptr_idxs = recv_buffer_idxs_local; 6485 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6486 for (i=0;i<n_recvs;i++) { 6487 PetscInt j; 6488 for (j=0;j<*(ptr_idxs+1);j++) { 6489 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6490 } 6491 ptr_idxs += olengths_idxs[i]; 6492 ptr_vals += olengths_idxs[i]-2; 6493 } 6494 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6495 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6496 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6497 } 6498 6499 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6500 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6501 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6502 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6503 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6504 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6505 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6506 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6507 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6508 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6509 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6510 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6511 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6512 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6513 ierr = PetscFree(onodes);CHKERRQ(ierr); 6514 if (nis) { 6515 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6516 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6517 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6518 } 6519 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6520 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6521 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6522 for (i=0;i<nis;i++) { 6523 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6524 } 6525 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6526 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6527 } 6528 *mat_n = NULL; 6529 } 6530 PetscFunctionReturn(0); 6531 } 6532 6533 /* temporary hack into ksp private data structure */ 6534 #include <petsc/private/kspimpl.h> 6535 6536 #undef __FUNCT__ 6537 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6538 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6539 { 6540 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6541 PC_IS *pcis = (PC_IS*)pc->data; 6542 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6543 Mat coarsedivudotp = NULL; 6544 MatNullSpace CoarseNullSpace = NULL; 6545 ISLocalToGlobalMapping coarse_islg; 6546 IS coarse_is,*isarray; 6547 PetscInt i,im_active=-1,active_procs=-1; 6548 PetscInt nis,nisdofs,nisneu,nisvert; 6549 PC pc_temp; 6550 PCType coarse_pc_type; 6551 KSPType coarse_ksp_type; 6552 PetscBool multilevel_requested,multilevel_allowed; 6553 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6554 Mat t_coarse_mat_is; 6555 PetscInt ncoarse; 6556 PetscBool compute_vecs = PETSC_FALSE; 6557 PetscScalar *array; 6558 MatReuse coarse_mat_reuse; 6559 PetscBool restr, full_restr, have_void; 6560 PetscErrorCode ierr; 6561 6562 PetscFunctionBegin; 6563 /* Assign global numbering to coarse dofs */ 6564 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6565 PetscInt ocoarse_size; 6566 compute_vecs = PETSC_TRUE; 6567 ocoarse_size = pcbddc->coarse_size; 6568 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6569 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6570 /* see if we can avoid some work */ 6571 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6572 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6573 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6574 PC pc; 6575 PetscBool isbddc; 6576 6577 /* temporary workaround since PCBDDC does not have a reset method so far */ 6578 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6579 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6580 if (isbddc) { 6581 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6582 } else { 6583 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6584 } 6585 coarse_reuse = PETSC_FALSE; 6586 } else { /* we can safely reuse already computed coarse matrix */ 6587 coarse_reuse = PETSC_TRUE; 6588 } 6589 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6590 coarse_reuse = PETSC_FALSE; 6591 } 6592 /* reset any subassembling information */ 6593 if (!coarse_reuse || pcbddc->recompute_topography) { 6594 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6595 } 6596 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6597 coarse_reuse = PETSC_TRUE; 6598 } 6599 /* assemble coarse matrix */ 6600 if (coarse_reuse && pcbddc->coarse_ksp) { 6601 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6602 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6603 coarse_mat_reuse = MAT_REUSE_MATRIX; 6604 } else { 6605 coarse_mat = NULL; 6606 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6607 } 6608 6609 /* creates temporary l2gmap and IS for coarse indexes */ 6610 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6611 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6612 6613 /* creates temporary MATIS object for coarse matrix */ 6614 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6615 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6616 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6617 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6618 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6619 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6620 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6621 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6622 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6623 6624 /* count "active" (i.e. with positive local size) and "void" processes */ 6625 im_active = !!(pcis->n); 6626 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6627 6628 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6629 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6630 /* full_restr : just use the receivers from the subassembling pattern */ 6631 coarse_mat_is = NULL; 6632 multilevel_allowed = PETSC_FALSE; 6633 multilevel_requested = PETSC_FALSE; 6634 full_restr = PETSC_TRUE; 6635 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6636 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6637 if (multilevel_requested) { 6638 ncoarse = active_procs/pcbddc->coarsening_ratio; 6639 restr = PETSC_FALSE; 6640 full_restr = PETSC_FALSE; 6641 } else { 6642 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6643 restr = PETSC_TRUE; 6644 full_restr = PETSC_TRUE; 6645 } 6646 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6647 ncoarse = PetscMax(1,ncoarse); 6648 if (!pcbddc->coarse_subassembling) { 6649 if (pcbddc->coarsening_ratio > 1) { 6650 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6651 } else { 6652 PetscMPIInt size,rank; 6653 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6654 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6655 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6656 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6657 } 6658 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6659 PetscInt psum; 6660 PetscMPIInt size; 6661 if (pcbddc->coarse_ksp) psum = 1; 6662 else psum = 0; 6663 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6664 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6665 if (ncoarse < size) have_void = PETSC_TRUE; 6666 } 6667 /* determine if we can go multilevel */ 6668 if (multilevel_requested) { 6669 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6670 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6671 } 6672 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6673 6674 /* dump subassembling pattern */ 6675 if (pcbddc->dbg_flag && multilevel_allowed) { 6676 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6677 } 6678 6679 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6680 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6681 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6682 const PetscInt *idxs; 6683 ISLocalToGlobalMapping tmap; 6684 6685 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6686 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6687 /* allocate space for temporary storage */ 6688 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6689 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 6690 /* allocate for IS array */ 6691 nisdofs = pcbddc->n_ISForDofsLocal; 6692 nisneu = !!pcbddc->NeumannBoundariesLocal; 6693 nisvert = 0; /* nisvert is not used */ 6694 nis = nisdofs + nisneu + nisvert; 6695 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 6696 /* dofs splitting */ 6697 for (i=0;i<nisdofs;i++) { 6698 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 6699 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 6700 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6701 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6702 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6703 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6704 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6705 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 6706 } 6707 /* neumann boundaries */ 6708 if (pcbddc->NeumannBoundariesLocal) { 6709 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 6710 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 6711 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6712 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6713 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6714 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6715 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 6716 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 6717 } 6718 /* free memory */ 6719 ierr = PetscFree(tidxs);CHKERRQ(ierr); 6720 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 6721 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6722 } else { 6723 nis = 0; 6724 nisdofs = 0; 6725 nisneu = 0; 6726 nisvert = 0; 6727 isarray = NULL; 6728 } 6729 /* destroy no longer needed map */ 6730 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6731 6732 /* subassemble */ 6733 if (multilevel_allowed) { 6734 Vec vp[1]; 6735 PetscInt nvecs = 0; 6736 PetscBool reuse,reuser; 6737 6738 if (coarse_mat) reuse = PETSC_TRUE; 6739 else reuse = PETSC_FALSE; 6740 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6741 vp[0] = NULL; 6742 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6743 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6744 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6745 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6746 nvecs = 1; 6747 6748 if (pcbddc->divudotp) { 6749 Mat B,loc_divudotp; 6750 Vec v,p; 6751 IS dummy; 6752 PetscInt np; 6753 6754 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6755 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6756 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6757 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6758 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6759 ierr = VecSet(p,1.);CHKERRQ(ierr); 6760 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6761 ierr = VecDestroy(&p);CHKERRQ(ierr); 6762 ierr = MatDestroy(&B);CHKERRQ(ierr); 6763 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6764 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6765 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6766 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6767 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6768 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6769 ierr = VecDestroy(&v);CHKERRQ(ierr); 6770 } 6771 } 6772 if (reuser) { 6773 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6774 } else { 6775 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6776 } 6777 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6778 PetscScalar *arraym,*arrayv; 6779 PetscInt nl; 6780 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 6781 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 6782 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6783 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 6784 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 6785 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 6786 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6787 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 6788 } else { 6789 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 6790 } 6791 } else { 6792 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 6793 } 6794 if (coarse_mat_is || coarse_mat) { 6795 PetscMPIInt size; 6796 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 6797 if (!multilevel_allowed) { 6798 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 6799 } else { 6800 Mat A; 6801 6802 /* if this matrix is present, it means we are not reusing the coarse matrix */ 6803 if (coarse_mat_is) { 6804 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 6805 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 6806 coarse_mat = coarse_mat_is; 6807 } 6808 /* be sure we don't have MatSeqDENSE as local mat */ 6809 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 6810 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 6811 } 6812 } 6813 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 6814 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 6815 6816 /* create local to global scatters for coarse problem */ 6817 if (compute_vecs) { 6818 PetscInt lrows; 6819 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 6820 if (coarse_mat) { 6821 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 6822 } else { 6823 lrows = 0; 6824 } 6825 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 6826 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 6827 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 6828 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6829 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6830 } 6831 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 6832 6833 /* set defaults for coarse KSP and PC */ 6834 if (multilevel_allowed) { 6835 coarse_ksp_type = KSPRICHARDSON; 6836 coarse_pc_type = PCBDDC; 6837 } else { 6838 coarse_ksp_type = KSPPREONLY; 6839 coarse_pc_type = PCREDUNDANT; 6840 } 6841 6842 /* print some info if requested */ 6843 if (pcbddc->dbg_flag) { 6844 if (!multilevel_allowed) { 6845 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6846 if (multilevel_requested) { 6847 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6848 } else if (pcbddc->max_levels) { 6849 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 6850 } 6851 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6852 } 6853 } 6854 6855 /* create the coarse KSP object only once with defaults */ 6856 if (coarse_mat) { 6857 PetscViewer dbg_viewer = NULL; 6858 if (pcbddc->dbg_flag) { 6859 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 6860 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6861 } 6862 if (!pcbddc->coarse_ksp) { 6863 char prefix[256],str_level[16]; 6864 size_t len; 6865 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6866 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6867 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6868 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6869 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6870 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6871 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6872 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6873 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6874 /* prefix */ 6875 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6876 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6877 if (!pcbddc->current_level) { 6878 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6879 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6880 } else { 6881 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6882 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6883 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6884 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6885 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6886 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6887 } 6888 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6889 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6890 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6891 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6892 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6893 /* allow user customization */ 6894 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6895 } 6896 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6897 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6898 if (nisdofs) { 6899 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6900 for (i=0;i<nisdofs;i++) { 6901 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6902 } 6903 } 6904 if (nisneu) { 6905 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6906 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6907 } 6908 if (nisvert) { 6909 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6910 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6911 } 6912 6913 /* get some info after set from options */ 6914 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6915 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6916 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6917 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6918 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6919 isbddc = PETSC_FALSE; 6920 } 6921 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6922 if (isredundant) { 6923 KSP inner_ksp; 6924 PC inner_pc; 6925 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6926 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6927 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6928 } 6929 6930 /* parameters which miss an API */ 6931 if (isbddc) { 6932 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6933 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6934 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6935 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6936 if (pcbddc_coarse->benign_saddle_point) { 6937 Mat coarsedivudotp_is; 6938 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6939 IS row,col; 6940 const PetscInt *gidxs; 6941 PetscInt n,st,M,N; 6942 6943 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6944 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6945 st = st-n; 6946 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6947 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6948 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6949 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6950 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6951 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6952 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6953 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6954 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6955 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6956 ierr = ISDestroy(&row);CHKERRQ(ierr); 6957 ierr = ISDestroy(&col);CHKERRQ(ierr); 6958 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6959 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6960 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6961 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6962 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6963 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6964 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6965 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6966 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 6967 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6968 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6969 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6970 } 6971 } 6972 6973 /* propagate symmetry info of coarse matrix */ 6974 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6975 if (pc->pmat->symmetric_set) { 6976 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6977 } 6978 if (pc->pmat->hermitian_set) { 6979 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6980 } 6981 if (pc->pmat->spd_set) { 6982 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6983 } 6984 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6985 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6986 } 6987 /* set operators */ 6988 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6989 if (pcbddc->dbg_flag) { 6990 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6991 } 6992 } 6993 ierr = PetscFree(isarray);CHKERRQ(ierr); 6994 #if 0 6995 { 6996 PetscViewer viewer; 6997 char filename[256]; 6998 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6999 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7000 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7001 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7002 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7003 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7004 } 7005 #endif 7006 7007 if (pcbddc->coarse_ksp) { 7008 Vec crhs,csol; 7009 7010 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7011 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7012 if (!csol) { 7013 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7014 } 7015 if (!crhs) { 7016 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7017 } 7018 } 7019 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7020 7021 /* compute null space for coarse solver if the benign trick has been requested */ 7022 if (pcbddc->benign_null) { 7023 7024 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7025 for (i=0;i<pcbddc->benign_n;i++) { 7026 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7027 } 7028 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7029 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7030 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7031 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7032 if (coarse_mat) { 7033 Vec nullv; 7034 PetscScalar *array,*array2; 7035 PetscInt nl; 7036 7037 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7038 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7039 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7040 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7041 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7042 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7043 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7044 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7045 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7046 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7047 } 7048 } 7049 7050 if (pcbddc->coarse_ksp) { 7051 PetscBool ispreonly; 7052 7053 if (CoarseNullSpace) { 7054 PetscBool isnull; 7055 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7056 if (isnull) { 7057 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7058 } 7059 /* TODO: add local nullspaces (if any) */ 7060 } 7061 /* setup coarse ksp */ 7062 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7063 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7064 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7065 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7066 KSP check_ksp; 7067 KSPType check_ksp_type; 7068 PC check_pc; 7069 Vec check_vec,coarse_vec; 7070 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7071 PetscInt its; 7072 PetscBool compute_eigs; 7073 PetscReal *eigs_r,*eigs_c; 7074 PetscInt neigs; 7075 const char *prefix; 7076 7077 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7078 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7079 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7080 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7081 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7082 /* prevent from setup unneeded object */ 7083 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7084 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7085 if (ispreonly) { 7086 check_ksp_type = KSPPREONLY; 7087 compute_eigs = PETSC_FALSE; 7088 } else { 7089 check_ksp_type = KSPGMRES; 7090 compute_eigs = PETSC_TRUE; 7091 } 7092 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7093 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7094 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7095 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7096 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7097 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7098 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7099 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7100 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7101 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7102 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7103 /* create random vec */ 7104 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7105 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7106 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7107 /* solve coarse problem */ 7108 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7109 /* set eigenvalue estimation if preonly has not been requested */ 7110 if (compute_eigs) { 7111 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7112 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7113 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7114 if (neigs) { 7115 lambda_max = eigs_r[neigs-1]; 7116 lambda_min = eigs_r[0]; 7117 if (pcbddc->use_coarse_estimates) { 7118 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7119 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7120 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7121 } 7122 } 7123 } 7124 } 7125 7126 /* check coarse problem residual error */ 7127 if (pcbddc->dbg_flag) { 7128 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7129 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7130 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7131 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7132 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7133 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7134 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7135 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7136 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7137 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7138 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7139 if (CoarseNullSpace) { 7140 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7141 } 7142 if (compute_eigs) { 7143 PetscReal lambda_max_s,lambda_min_s; 7144 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7145 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7146 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7147 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7148 for (i=0;i<neigs;i++) { 7149 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7150 } 7151 } 7152 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7153 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7154 } 7155 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7156 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7157 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7158 if (compute_eigs) { 7159 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7160 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7161 } 7162 } 7163 } 7164 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7165 /* print additional info */ 7166 if (pcbddc->dbg_flag) { 7167 /* waits until all processes reaches this point */ 7168 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7169 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7170 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7171 } 7172 7173 /* free memory */ 7174 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7175 PetscFunctionReturn(0); 7176 } 7177 7178 #undef __FUNCT__ 7179 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7180 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7181 { 7182 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7183 PC_IS* pcis = (PC_IS*)pc->data; 7184 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7185 IS subset,subset_mult,subset_n; 7186 PetscInt local_size,coarse_size=0; 7187 PetscInt *local_primal_indices=NULL; 7188 const PetscInt *t_local_primal_indices; 7189 PetscErrorCode ierr; 7190 7191 PetscFunctionBegin; 7192 /* Compute global number of coarse dofs */ 7193 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7194 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7195 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7196 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7197 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7198 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7199 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7200 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7201 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7202 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7203 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7204 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7205 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7206 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7207 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7208 7209 /* check numbering */ 7210 if (pcbddc->dbg_flag) { 7211 PetscScalar coarsesum,*array,*array2; 7212 PetscInt i; 7213 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7214 7215 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7216 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7217 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7218 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7219 /* counter */ 7220 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7221 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7222 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7223 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7224 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7225 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7226 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7227 for (i=0;i<pcbddc->local_primal_size;i++) { 7228 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7229 } 7230 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7231 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7232 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7233 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7234 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7235 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7236 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7237 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7238 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7239 for (i=0;i<pcis->n;i++) { 7240 if (array[i] != 0.0 && array[i] != array2[i]) { 7241 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7242 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7243 set_error = PETSC_TRUE; 7244 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7245 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7246 } 7247 } 7248 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7249 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7250 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7251 for (i=0;i<pcis->n;i++) { 7252 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7253 } 7254 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7255 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7256 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7257 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7258 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7259 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7260 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7261 PetscInt *gidxs; 7262 7263 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7264 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7266 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7267 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7268 for (i=0;i<pcbddc->local_primal_size;i++) { 7269 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7270 } 7271 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7272 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7273 } 7274 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7275 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7276 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7277 } 7278 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7279 /* get back data */ 7280 *coarse_size_n = coarse_size; 7281 *local_primal_indices_n = local_primal_indices; 7282 PetscFunctionReturn(0); 7283 } 7284 7285 #undef __FUNCT__ 7286 #define __FUNCT__ "PCBDDCGlobalToLocal" 7287 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7288 { 7289 IS localis_t; 7290 PetscInt i,lsize,*idxs,n; 7291 PetscScalar *vals; 7292 PetscErrorCode ierr; 7293 7294 PetscFunctionBegin; 7295 /* get indices in local ordering exploiting local to global map */ 7296 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7297 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7298 for (i=0;i<lsize;i++) vals[i] = 1.0; 7299 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7300 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7301 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7302 if (idxs) { /* multilevel guard */ 7303 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7304 } 7305 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7306 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7307 ierr = PetscFree(vals);CHKERRQ(ierr); 7308 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7309 /* now compute set in local ordering */ 7310 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7311 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7312 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7313 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7314 for (i=0,lsize=0;i<n;i++) { 7315 if (PetscRealPart(vals[i]) > 0.5) { 7316 lsize++; 7317 } 7318 } 7319 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7320 for (i=0,lsize=0;i<n;i++) { 7321 if (PetscRealPart(vals[i]) > 0.5) { 7322 idxs[lsize++] = i; 7323 } 7324 } 7325 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7326 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7327 *localis = localis_t; 7328 PetscFunctionReturn(0); 7329 } 7330 7331 #undef __FUNCT__ 7332 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7333 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7334 { 7335 PC_IS *pcis=(PC_IS*)pc->data; 7336 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7337 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7338 Mat S_j; 7339 PetscInt *used_xadj,*used_adjncy; 7340 PetscBool free_used_adj; 7341 PetscErrorCode ierr; 7342 7343 PetscFunctionBegin; 7344 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7345 free_used_adj = PETSC_FALSE; 7346 if (pcbddc->sub_schurs_layers == -1) { 7347 used_xadj = NULL; 7348 used_adjncy = NULL; 7349 } else { 7350 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7351 used_xadj = pcbddc->mat_graph->xadj; 7352 used_adjncy = pcbddc->mat_graph->adjncy; 7353 } else if (pcbddc->computed_rowadj) { 7354 used_xadj = pcbddc->mat_graph->xadj; 7355 used_adjncy = pcbddc->mat_graph->adjncy; 7356 } else { 7357 PetscBool flg_row=PETSC_FALSE; 7358 const PetscInt *xadj,*adjncy; 7359 PetscInt nvtxs; 7360 7361 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7362 if (flg_row) { 7363 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7364 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7365 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7366 free_used_adj = PETSC_TRUE; 7367 } else { 7368 pcbddc->sub_schurs_layers = -1; 7369 used_xadj = NULL; 7370 used_adjncy = NULL; 7371 } 7372 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7373 } 7374 } 7375 7376 /* setup sub_schurs data */ 7377 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7378 if (!sub_schurs->schur_explicit) { 7379 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7380 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7381 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7382 } else { 7383 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7384 PetscBool isseqaij,need_change = PETSC_FALSE; 7385 PetscInt benign_n; 7386 Mat change = NULL; 7387 Vec scaling = NULL; 7388 IS change_primal = NULL; 7389 7390 if (!pcbddc->use_vertices && reuse_solvers) { 7391 PetscInt n_vertices; 7392 7393 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7394 reuse_solvers = (PetscBool)!n_vertices; 7395 } 7396 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7397 if (!isseqaij) { 7398 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7399 if (matis->A == pcbddc->local_mat) { 7400 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7401 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7402 } else { 7403 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7404 } 7405 } 7406 if (!pcbddc->benign_change_explicit) { 7407 benign_n = pcbddc->benign_n; 7408 } else { 7409 benign_n = 0; 7410 } 7411 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7412 We need a global reduction to avoid possible deadlocks. 7413 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7414 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7415 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7416 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7417 need_change = (PetscBool)(!need_change); 7418 } 7419 /* If the user defines additional constraints, we import them here. 7420 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7421 if (need_change) { 7422 PC_IS *pcisf; 7423 PC_BDDC *pcbddcf; 7424 PC pcf; 7425 7426 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7427 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7428 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7429 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7430 /* hacks */ 7431 pcisf = (PC_IS*)pcf->data; 7432 pcisf->is_B_local = pcis->is_B_local; 7433 pcisf->vec1_N = pcis->vec1_N; 7434 pcisf->BtoNmap = pcis->BtoNmap; 7435 pcisf->n = pcis->n; 7436 pcisf->n_B = pcis->n_B; 7437 pcbddcf = (PC_BDDC*)pcf->data; 7438 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7439 pcbddcf->mat_graph = pcbddc->mat_graph; 7440 pcbddcf->use_faces = PETSC_TRUE; 7441 pcbddcf->use_change_of_basis = PETSC_TRUE; 7442 pcbddcf->use_change_on_faces = PETSC_TRUE; 7443 pcbddcf->use_qr_single = PETSC_TRUE; 7444 pcbddcf->fake_change = PETSC_TRUE; 7445 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7446 /* store information on primal vertices and change of basis (in local numbering) */ 7447 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7448 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7449 change = pcbddcf->ConstraintMatrix; 7450 pcbddcf->ConstraintMatrix = NULL; 7451 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7452 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7453 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7454 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7455 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7456 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7457 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7458 pcf->ops->destroy = NULL; 7459 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7460 } 7461 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7462 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7463 ierr = MatDestroy(&change);CHKERRQ(ierr); 7464 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7465 } 7466 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7467 7468 /* free adjacency */ 7469 if (free_used_adj) { 7470 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7471 } 7472 PetscFunctionReturn(0); 7473 } 7474 7475 #undef __FUNCT__ 7476 #define __FUNCT__ "PCBDDCInitSubSchurs" 7477 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7478 { 7479 PC_IS *pcis=(PC_IS*)pc->data; 7480 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7481 PCBDDCGraph graph; 7482 PetscErrorCode ierr; 7483 7484 PetscFunctionBegin; 7485 /* attach interface graph for determining subsets */ 7486 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7487 IS verticesIS,verticescomm; 7488 PetscInt vsize,*idxs; 7489 7490 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7491 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7492 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7493 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7494 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7495 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7496 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7497 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7498 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7499 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7500 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7501 } else { 7502 graph = pcbddc->mat_graph; 7503 } 7504 /* print some info */ 7505 if (pcbddc->dbg_flag) { 7506 IS vertices; 7507 PetscInt nv,nedges,nfaces; 7508 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7509 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7510 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7511 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7512 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7513 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7514 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7515 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7516 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7517 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7518 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7519 } 7520 7521 /* sub_schurs init */ 7522 if (!pcbddc->sub_schurs) { 7523 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7524 } 7525 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 7526 7527 /* free graph struct */ 7528 if (pcbddc->sub_schurs_rebuild) { 7529 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7530 } 7531 PetscFunctionReturn(0); 7532 } 7533 7534 #undef __FUNCT__ 7535 #define __FUNCT__ "PCBDDCCheckOperator" 7536 PetscErrorCode PCBDDCCheckOperator(PC pc) 7537 { 7538 PC_IS *pcis=(PC_IS*)pc->data; 7539 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7540 PetscErrorCode ierr; 7541 7542 PetscFunctionBegin; 7543 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7544 IS zerodiag = NULL; 7545 Mat S_j,B0_B=NULL; 7546 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7547 PetscScalar *p0_check,*array,*array2; 7548 PetscReal norm; 7549 PetscInt i; 7550 7551 /* B0 and B0_B */ 7552 if (zerodiag) { 7553 IS dummy; 7554 7555 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7556 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7557 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7558 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7559 } 7560 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7561 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7562 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7563 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7564 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7565 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7566 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7567 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7568 /* S_j */ 7569 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7570 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7571 7572 /* mimic vector in \widetilde{W}_\Gamma */ 7573 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7574 /* continuous in primal space */ 7575 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7576 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7577 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7578 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7579 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7580 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7581 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7582 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7583 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7584 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7585 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7586 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7587 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7588 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7589 7590 /* assemble rhs for coarse problem */ 7591 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7592 /* local with Schur */ 7593 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7594 if (zerodiag) { 7595 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7596 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7597 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7598 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7599 } 7600 /* sum on primal nodes the local contributions */ 7601 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7602 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7603 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7604 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7605 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7606 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7607 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7608 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7609 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7610 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7611 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7612 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7613 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7614 /* scale primal nodes (BDDC sums contibutions) */ 7615 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7616 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7617 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7618 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7619 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7620 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7621 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7622 /* global: \widetilde{B0}_B w_\Gamma */ 7623 if (zerodiag) { 7624 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7625 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7626 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7627 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7628 } 7629 /* BDDC */ 7630 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7631 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7632 7633 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7634 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7635 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7636 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7637 for (i=0;i<pcbddc->benign_n;i++) { 7638 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7639 } 7640 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7641 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7642 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7643 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7644 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7645 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7646 } 7647 PetscFunctionReturn(0); 7648 } 7649