1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 84 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 85 86 /* gradients */ 87 ptr = work + 5*esize; 88 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 89 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 90 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 91 ierr = MatDestroy(&GE);CHKERRQ(ierr); 92 93 /* constants */ 94 ptr += rsize*csize; 95 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 96 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 97 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 98 ierr = MatDestroy(&GE);CHKERRQ(ierr); 99 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 100 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "PCBDDCNedelecSupport" 106 PetscErrorCode PCBDDCNedelecSupport(PC pc) 107 { 108 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 109 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 110 Mat G,T,conn,lG,lGt,lGis,lGall,lGe; 111 MatNullSpace nnsp; 112 Vec tvec,*quads; 113 PetscSF sfv; 114 ISLocalToGlobalMapping el2g,vl2g; 115 MPI_Comm comm; 116 IS lned,primals; 117 IS *eedges,*extrows,*extcols; 118 PetscBT btv,bte,btvc,btb,btvcand,btvi; 119 PetscScalar *vals,*work; 120 PetscReal *rwork; 121 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 122 PetscInt ne,nv,Ne,Nv,Le,Lv,order; 123 PetscInt n_neigh,*neigh,*n_shared,**shared; 124 PetscInt i,j,extmem,cum,maxsize,rst,nee,nquads=2; 125 PetscInt *extrow,*extrowcum,*marks,*emarks,*vmarks,*gidxs; 126 PetscInt *sfvleaves,*sfvroots; 127 PetscBool print,eerr,done,lrc[2],conforming,ismpiaij; 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 /* test variable order code and print debug info TODO: to be removed */ 132 print = PETSC_FALSE; 133 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 134 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 135 136 /* Return to caller if there are no edges in the decomposition */ 137 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 138 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 139 ierr = ISLocalToGlobalMappingGetSize(el2g,&ne);CHKERRQ(ierr); 140 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 141 lrc[0] = PETSC_FALSE; 142 for (i=0;i<ne;i++) { 143 if (PetscRealPart(vals[i]) > 2.) { 144 lrc[0] = PETSC_TRUE; 145 break; 146 } 147 } 148 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 149 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 150 if (!lrc[1]) PetscFunctionReturn(0); 151 152 /* Get relevant objects */ 153 G = pcbddc->discretegradient; 154 order = pcbddc->nedorder; 155 conforming = pcbddc->conforming; 156 157 /* Compute local and global sizes of egde dofs and nodal dofs */ 158 ierr = MatGetSize(G,&Ne,&Nv);CHKERRQ(ierr); 159 ierr = MatGetLocalSize(G,&Le,&Lv);CHKERRQ(ierr); 160 161 /* Sanity checks */ 162 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 163 ierr = PetscObjectTypeCompare((PetscObject)G,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 164 if (!ismpiaij) SETERRQ1(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with discrete gradient of type %s. Please use MPIAIJ",((PetscObject)G)->type_name); 165 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 166 if (order && Ne%order) SETERRQ2(comm,PETSC_ERR_USER,"The number of edge dofs %d it's not a multiple of the order %d",Ne,order); 167 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 168 ierr = MatGetSize(pc->pmat,&i,NULL);CHKERRQ(ierr); 169 if (Ne != i) SETERRQ2(comm,PETSC_ERR_USER,"Global matrix sizes should match! Number of rows of G %d differs from that of A %d",Ne,i); 170 ierr = MatGetLocalSize(pc->pmat,&i,NULL);CHKERRQ(ierr); 171 if (Le != i) SETERRQ2(comm,PETSC_ERR_USER,"Local matrix sizes should match! Number of local rows of G %d differs from that of A %d",Le,i); 172 173 /* Drop connections for interior edges (this modifies G) */ 174 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 175 ierr = VecScatterBegin(matis->rctx,matis->counter,tvec,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 176 ierr = VecScatterEnd(matis->rctx,matis->counter,tvec,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 177 ierr = PetscMalloc1(Le,&marks);CHKERRQ(ierr); 178 ierr = VecGetOwnershipRange(tvec,&rst,NULL);CHKERRQ(ierr); 179 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&vals);CHKERRQ(ierr); 180 cum = 0; 181 for (i=0;i<Le;i++) if (PetscRealPart(vals[i]) < 1.5) marks[cum++] = i+rst; 182 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&vals);CHKERRQ(ierr); 183 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 184 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 185 ierr = MatZeroRows(G,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 186 ierr = PetscFree(marks);CHKERRQ(ierr); 187 188 /* Extract subdomain relevant rows of G */ 189 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 190 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 191 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 193 ierr = ISDestroy(&lned);CHKERRQ(ierr); 194 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 195 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 196 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 197 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 198 if (print) { 199 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 200 ierr = MatView(lG,NULL);CHKERRQ(ierr); 201 } 202 203 /* SF for nodal communications */ 204 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 205 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 207 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 208 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 209 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 210 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 211 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 212 213 /* Destroy temporary G created in MATIS format */ 214 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 215 216 /* Analyze the edge-nodes connections (duplicate lG) */ 217 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 218 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 219 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 220 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 221 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 222 /* need to import the boundary specification to ensure the 223 proper detection of coarse edges' endpoints */ 224 if (pcbddc->DirichletBoundariesLocal) { 225 ierr = ISGetLocalSize(pcbddc->DirichletBoundariesLocal,&cum);CHKERRQ(ierr); 226 ierr = ISGetIndices(pcbddc->DirichletBoundariesLocal,&idxs);CHKERRQ(ierr); 227 for (i=0;i<cum;i++) { 228 if (idxs[i] >= 0) { 229 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 230 } 231 } 232 ierr = ISRestoreIndices(pcbddc->DirichletBoundariesLocal,&idxs);CHKERRQ(ierr); 233 } 234 if (pcbddc->NeumannBoundariesLocal) { 235 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&cum);CHKERRQ(ierr); 236 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 237 for (i=0;i<cum;i++) { 238 if (idxs[i] >= 0) { 239 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 240 } 241 } 242 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 243 } 244 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 245 /* need to remove coarse faces' dofs to ensure the 246 proper detection of coarse edges' endpoints */ 247 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 248 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 249 for (i=1;i<n_neigh;i++) { 250 PetscInt j; 251 for (j=0;j<n_shared[i];j++) marks[shared[i][j]]++; 252 } 253 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 254 cum = 0; 255 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 256 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 257 for (i=0;i<ne;i++) { 258 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 259 if (!marks[i] || (marks[i] == 1 && !PetscBTLookup(btb,i))) { 260 marks[cum++] = i; 261 continue; 262 } 263 /* set badly connected edge dofs as primal */ 264 if (!conforming && ii[i+1]-ii[i] != order + 1) { 265 marks[cum++] = i; 266 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 267 for (j=ii[i];j<ii[i+1];j++) { 268 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 269 } 270 } 271 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 272 if (!order && ii[i+1] != ii[i]) { 273 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 274 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 275 } 276 } 277 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 278 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 279 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 280 /* identify splitpoints and corner candidates: TODO variable order */ 281 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 282 if (print) { 283 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 284 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 285 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 286 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 287 } 288 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 289 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 290 for (i=0;i<nv;i++) { 291 PetscInt ord = order, test = ii[i+1]-ii[i]; 292 if (!order) { 293 PetscReal vorder = 0.; 294 295 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 296 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 297 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 298 ord = 1; 299 } 300 #if defined(PETSC_USE_DEBUG) 301 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 302 #endif 303 if (test >= 3*ord) { /* splitpoints */ 304 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 305 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 306 } else if (test == ord) { 307 if (order == 1) { 308 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 309 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 310 } else { 311 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 312 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 313 } 314 } 315 } 316 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 317 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 318 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 319 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 320 321 /* Get the local G^T explicitly */ 322 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 323 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 324 if (print) { 325 ierr = PetscObjectSetName((PetscObject)lGt,"initial_lGt");CHKERRQ(ierr); 326 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 327 } 328 329 /* Mark interior nodal dofs */ 330 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 331 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 332 for (i=1;i<n_neigh;i++) { 333 for (j=0;j<n_shared[i];j++) { 334 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 335 } 336 } 337 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 338 339 /* communicate corners and splitpoints */ 340 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 341 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 342 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 343 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 344 345 if (print) { 346 IS tbz; 347 348 cum = 0; 349 for (i=0;i<nv;i++) 350 if (sfvleaves[i]) 351 vmarks[cum++] = i; 352 353 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 354 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 355 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 356 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 357 } 358 359 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 360 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 361 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 362 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 363 364 /* Zero rows of lGt corresponding to identified corners 365 and interior nodal dofs */ 366 cum = 0; 367 for (i=0;i<nv;i++) { 368 if (sfvleaves[i]) { 369 vmarks[cum++] = i; 370 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 371 } 372 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 373 } 374 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 375 if (print) { 376 IS tbz; 377 378 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 379 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 380 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 381 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 382 } 383 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 384 ierr = PetscFree(vmarks);CHKERRQ(ierr); 385 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 386 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 387 388 /* Recompute G */ 389 ierr = MatDestroy(&lG);CHKERRQ(ierr); 390 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 391 if (print) { 392 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 393 ierr = MatView(lG,NULL);CHKERRQ(ierr); 394 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 395 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 396 } 397 398 /* Get primal dofs (if any) */ 399 cum = 0; 400 for (i=0;i<ne;i++) { 401 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 402 } 403 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 404 if (print) { 405 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 406 ierr = ISView(primals,NULL);CHKERRQ(ierr); 407 } 408 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 409 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 410 ierr = ISDestroy(&primals);CHKERRQ(ierr); 411 412 /* Compute edge connectivity */ 413 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 414 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 415 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 416 ierr = PCBDDCSetLocalAdjacencyGraph(pc,ne,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 417 418 /* Analyze interface for edge dofs */ 419 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 420 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 421 422 /* Get coarse edges in the edge space */ 423 pcbddc->mat_graph->twodim = PETSC_FALSE; 424 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 425 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 426 427 /* Mark fine edge dofs with their coarse edge id */ 428 maxsize = 0; 429 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 430 for (i=0;i<nee;i++) { 431 PetscInt size,mark = i+1; 432 433 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 434 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 435 for (j=0;j<size;j++) marks[idxs[j]] = mark; 436 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 437 maxsize = PetscMax(maxsize,size); 438 } 439 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 440 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 441 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 442 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 443 if (print) { 444 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 445 ierr = ISView(primals,NULL);CHKERRQ(ierr); 446 } 447 448 /* Find coarse edge endpoints */ 449 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 450 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 451 for (i=0;i<nee;i++) { 452 PetscInt mark = i+1,size; 453 454 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 455 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 456 if (print) { 457 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 458 ISView(eedges[i],NULL); 459 } 460 for (j=0;j<size;j++) { 461 PetscInt k, ee = idxs[j]; 462 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 463 for (k=ii[ee];k<ii[ee+1];k++) { 464 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 465 if (PetscBTLookup(btv,jj[k])) { 466 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 467 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 468 if (print) PetscPrintf(PETSC_COMM_SELF," candidate? %d\n",(PetscBool)(PetscBTLookup(btvcand,jj[k]))); 469 PetscInt k2; 470 PetscBool corner = PETSC_FALSE; 471 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 472 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: count %d, mark %d (ref mark %d), special %d\n",jjt[k2],pcbddc->mat_graph->count[jjt[k2]],marks[jjt[k2]],mark,pcbddc->mat_graph->special_dof[jjt[k2]]); 473 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || 474 pcbddc->mat_graph->special_dof[jjt[k2]] == PCBDDCGRAPH_DIRICHLET_MARK) { 475 corner = PETSC_TRUE; 476 break; 477 } 478 } 479 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 480 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 481 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 482 } else { 483 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 484 } 485 } 486 } 487 } 488 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 489 } 490 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 491 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 492 493 /* Reset marked primal dofs */ 494 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 495 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 496 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 497 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 498 499 /* Compute extended cols indices */ 500 ierr = MatGetRowIJ(lG ,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 501 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 502 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 503 i *= maxsize; 504 ierr = PetscMalloc1(nee,&extcols);CHKERRQ(ierr); 505 ierr = PetscCalloc1(nee,&emarks);CHKERRQ(ierr); 506 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 507 eerr = PETSC_FALSE; 508 for (i=0;i<nee;i++) { 509 PetscInt size; 510 511 cum = 0; 512 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 513 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 514 for (j=0;j<size;j++) { 515 PetscInt k,ee = idxs[j]; 516 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 517 } 518 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 519 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 520 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 521 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 522 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 523 /* it may happen that endpoints are not defined at this point 524 if it is the case, mark this edge for a second pass */ 525 if (cum != size -1) { 526 emarks[i] = 1; 527 if (print) { 528 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 529 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 530 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 531 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 532 } 533 eerr = PETSC_TRUE; 534 } 535 } 536 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 537 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 538 if (done) { 539 PetscInt *newprimals; 540 541 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 542 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 543 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 544 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 545 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 546 for (i=0;i<nee;i++) { 547 if (emarks[i]) { 548 PetscInt size,mark = i+1; 549 550 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 551 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 552 for (j=0;j<size;j++) { 553 /* newprimals[cum++] = idxs[j]; */ 554 PetscInt k,ee = idxs[j]; 555 for (k=ii[ee];k<ii[ee+1];k++) { 556 /* set all candidates located on the edge as corners */ 557 if (PetscBTLookup(btvcand,jj[k])) { 558 PetscInt k2,vv = jj[k]; 559 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 560 /* set all edge dofs connected to candidate as primals */ 561 for (k2=iit[vv];k2<iit[vv+1];k2++) { 562 if (marks[jjt[k2]] == mark) { 563 PetscInt k3,ee2 = jjt[k2]; 564 newprimals[cum++] = ee2; 565 /* finally set the new corners */ 566 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 567 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 568 } 569 } 570 } 571 } 572 } 573 } 574 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 575 } 576 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 577 } 578 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 579 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 580 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 581 ierr = PetscFree(newprimals);CHKERRQ(ierr); 582 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 583 ierr = ISDestroy(&primals);CHKERRQ(ierr); 584 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 585 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 586 587 /* Mark again */ 588 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 589 for (i=0;i<nee;i++) { 590 PetscInt size,mark = i+1; 591 592 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 593 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 594 for (j=0;j<size;j++) marks[idxs[j]] = mark; 595 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 596 } 597 if (print) { 598 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 599 ierr = ISView(primals,NULL);CHKERRQ(ierr); 600 } 601 602 /* Recompute extended cols */ 603 eerr = PETSC_FALSE; 604 for (i=0;i<nee;i++) { 605 PetscInt size; 606 607 cum = 0; 608 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 609 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 610 for (j=0;j<size;j++) { 611 PetscInt k,ee = idxs[j]; 612 for (k=ii[ee];k<ii[ee+1];k++) { 613 if (!PetscBTLookup(btv,jj[k])) { 614 extrow[cum++] = jj[k]; 615 } 616 } 617 } 618 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 619 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 620 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 621 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 622 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 623 if (cum != size -1) { 624 if (print) { 625 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 626 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 627 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 628 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 629 } 630 eerr = PETSC_TRUE; 631 } 632 } 633 } 634 ierr = MatRestoreRowIJ( lG,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 635 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 636 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 637 ierr = PetscFree(emarks);CHKERRQ(ierr); 638 /* an error should not occur at this point */ 639 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 640 641 /* Check the number of endpoints */ 642 /* TODO: add case for circular edge */ 643 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 644 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 645 for (i=0;i<nee;i++) { 646 PetscInt size, found = 0; 647 648 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 649 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 650 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 651 for (j=0;j<size;j++) { 652 PetscInt k,ee = idxs[j]; 653 for (k=ii[ee];k<ii[ee+1];k++) { 654 PetscInt vv = jj[k]; 655 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 656 found++; 657 } 658 } 659 } 660 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 661 if (found != 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d\n",found,i); 662 } 663 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 664 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 665 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 666 667 #if defined(PETSC_USE_DEBUG) 668 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 669 not interfere with neighbouring coarse edges */ 670 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 671 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 672 for (i=0;i<nv;i++) { 673 PetscInt emax = 0,eemax = 0; 674 675 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 676 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 677 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 678 for (j=1;j<nee+1;j++) { 679 if (emax < emarks[j]) { 680 emax = emarks[j]; 681 eemax = j; 682 } 683 } 684 /* not relevant for edges */ 685 if (!eemax) continue; 686 687 for (j=ii[i];j<ii[i+1];j++) { 688 if (marks[jj[j]] && marks[jj[j]] != eemax) { 689 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dod %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 690 } 691 } 692 } 693 ierr = PetscFree(emarks);CHKERRQ(ierr); 694 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 695 #endif 696 697 /* Compute extended rows indices for edge blocks of the change of basis */ 698 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 699 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 700 extmem *= maxsize; 701 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 702 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 703 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 704 for (i=0;i<nv;i++) { 705 PetscInt mark = 0,size,start; 706 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 707 for (j=ii[i];j<ii[i+1];j++) 708 if (marks[jj[j]] && !mark) 709 mark = marks[jj[j]]; 710 711 /* not relevant */ 712 if (!mark) continue; 713 714 /* import extended row */ 715 mark--; 716 start = mark*extmem+extrowcum[mark]; 717 size = ii[i+1]-ii[i]; 718 #if defined(PETSC_USE_DEBUG) 719 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 720 #endif 721 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 722 extrowcum[mark] += size; 723 } 724 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 725 cum = 0; 726 for (i=0;i<nee;i++) { 727 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 728 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 729 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 730 cum = PetscMax(cum,size); 731 } 732 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 733 ierr = PetscFree(marks);CHKERRQ(ierr); 734 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 735 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 736 737 /* Workspace for lapack inner calls and VecSetValues */ 738 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 739 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 740 for (i=0;i<maxsize;i++) vals[i] = 1.; 741 742 /* Create vectors for quadrature rules */ 743 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 744 for (i=0;i<nquads;i++) { 745 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 746 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 747 } 748 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 749 750 /* Create change of basis matrix (preallocation can be improved) */ 751 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 752 ierr = MatSetSizes(T,Le,Le,Ne,Ne);CHKERRQ(ierr); 753 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 754 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 755 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 756 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 757 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 758 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 759 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 760 761 /* Defaults to identity */ 762 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 763 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 764 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 765 766 for (i=0;i<nee;i++) { 767 Mat Gins = NULL, GKins = NULL; 768 769 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 770 if (Gins && GKins) { 771 PetscScalar *data; 772 const PetscInt *rows,*cols; 773 PetscInt nrh,nch,nrc,ncc; 774 775 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 776 /* H1 */ 777 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 778 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 779 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 780 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 781 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 782 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 783 /* complement */ 784 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 785 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 786 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 787 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 788 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 789 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 790 /* Gins kernel quadratures */ 791 for (j=0;j<ncc;j++) { 792 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 793 } 794 /* H1 average */ 795 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 796 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 797 } 798 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 799 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 800 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 801 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 802 } 803 804 /* Start assembling */ 805 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 for (i=0;i<nquads;i++) { 807 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 808 } 809 810 /* Free */ 811 ierr = PetscFree(extrow);CHKERRQ(ierr); 812 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 813 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 814 ierr = PetscFree(vals);CHKERRQ(ierr); 815 ierr = PetscFree(extrows);CHKERRQ(ierr); 816 ierr = PetscFree(extcols);CHKERRQ(ierr); 817 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 818 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 819 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 820 ierr = MatDestroy(&lG);CHKERRQ(ierr); 821 ierr = MatDestroy(&conn);CHKERRQ(ierr); 822 823 /* Complete assembling */ 824 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 825 for (i=0;i<nquads;i++) { 826 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 827 } 828 for (i=0;i<nquads;i++) { 829 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 830 } 831 ierr = PetscFree(quads);CHKERRQ(ierr); 832 833 /* tell PCBDDC the topography has been analyzed */ 834 pcbddc->recompute_topography = PETSC_FALSE; 835 836 /* set change of basis */ 837 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 838 ierr = MatDestroy(&T);CHKERRQ(ierr); 839 840 /* set quadratures */ 841 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 842 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 843 844 PetscFunctionReturn(0); 845 } 846 847 /* the near-null space of BDDC carries information on quadrature weights, 848 and these can be collinear -> so cheat with MatNullSpaceCreate 849 and create a suitable set of basis vectors first */ 850 #undef __FUNCT__ 851 #define __FUNCT__ "PCBDDCNullSpaceCreate" 852 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 853 { 854 PetscErrorCode ierr; 855 PetscInt i; 856 857 PetscFunctionBegin; 858 for (i=0;i<nvecs;i++) { 859 PetscInt first,last; 860 861 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 862 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 863 if (i>=first && i < last) { 864 PetscScalar *data; 865 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 866 if (!has_const) { 867 data[i-first] = 1.; 868 } else { 869 data[2*i-first] = 1./PetscSqrtReal(2.); 870 data[2*i-first+1] = -1./PetscSqrtReal(2.); 871 } 872 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 873 } 874 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 875 } 876 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 877 for (i=0;i<nvecs;i++) { /* reset vectors */ 878 PetscInt first,last; 879 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 880 if (i>=first && i < last) { 881 PetscScalar *data; 882 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 883 if (!has_const) { 884 data[i-first] = 0.; 885 } else { 886 data[2*i-first] = 0.; 887 data[2*i-first+1] = 0.; 888 } 889 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 890 } 891 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 892 } 893 PetscFunctionReturn(0); 894 } 895 896 #undef __FUNCT__ 897 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 898 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 899 { 900 Mat loc_divudotp; 901 Vec p,v,vins,quad_vec,*quad_vecs; 902 ISLocalToGlobalMapping map; 903 IS *faces,*edges; 904 PetscScalar *vals; 905 const PetscScalar *array; 906 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 907 PetscMPIInt rank; 908 PetscErrorCode ierr; 909 910 PetscFunctionBegin; 911 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 912 if (graph->twodim) { 913 lmaxneighs = 2; 914 } else { 915 lmaxneighs = 1; 916 for (i=0;i<ne;i++) { 917 const PetscInt *idxs; 918 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 919 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 920 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 921 } 922 lmaxneighs++; /* graph count does not include self */ 923 } 924 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 925 maxsize = 0; 926 for (i=0;i<ne;i++) { 927 PetscInt nn; 928 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 929 maxsize = PetscMax(maxsize,nn); 930 } 931 for (i=0;i<nf;i++) { 932 PetscInt nn; 933 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 934 maxsize = PetscMax(maxsize,nn); 935 } 936 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 937 /* create vectors to hold quadrature weights */ 938 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 939 if (!transpose) { 940 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 941 } else { 942 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 943 } 944 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 945 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 946 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 947 for (i=0;i<maxneighs;i++) { 948 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 949 } 950 951 /* compute local quad vec */ 952 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 953 if (!transpose) { 954 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 955 } else { 956 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 957 } 958 ierr = VecSet(p,1.);CHKERRQ(ierr); 959 if (!transpose) { 960 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 961 } else { 962 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 963 } 964 if (vl2l) { 965 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 966 } else { 967 vins = v; 968 } 969 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 970 ierr = VecDestroy(&p);CHKERRQ(ierr); 971 972 /* insert in global quadrature vecs */ 973 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 974 for (i=0;i<nf;i++) { 975 const PetscInt *idxs; 976 PetscInt idx,nn,j; 977 978 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 979 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 980 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 981 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 982 idx = -(idx+1); 983 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 984 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 985 } 986 for (i=0;i<ne;i++) { 987 const PetscInt *idxs; 988 PetscInt idx,nn,j; 989 990 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 991 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 992 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 993 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 994 idx = -(idx+1); 995 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 996 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 997 } 998 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 999 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1000 if (vl2l) { 1001 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1002 } 1003 ierr = VecDestroy(&v);CHKERRQ(ierr); 1004 ierr = PetscFree(vals);CHKERRQ(ierr); 1005 1006 /* assemble near null space */ 1007 for (i=0;i<maxneighs;i++) { 1008 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1009 } 1010 for (i=0;i<maxneighs;i++) { 1011 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1012 } 1013 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1014 PetscFunctionReturn(0); 1015 } 1016 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1020 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1021 { 1022 PetscErrorCode ierr; 1023 Vec local,global; 1024 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1025 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1026 1027 PetscFunctionBegin; 1028 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1029 /* need to convert from global to local topology information and remove references to information in global ordering */ 1030 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1031 if (pcbddc->user_provided_isfordofs) { 1032 if (pcbddc->n_ISForDofs) { 1033 PetscInt i; 1034 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1035 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1036 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1037 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1038 } 1039 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1040 pcbddc->n_ISForDofs = 0; 1041 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1042 } 1043 } else { 1044 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1045 PetscInt i, n = matis->A->rmap->n; 1046 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1047 if (i > 1) { 1048 pcbddc->n_ISForDofsLocal = i; 1049 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1050 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1051 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1052 } 1053 } 1054 } 1055 } 1056 1057 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1058 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1059 } 1060 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1061 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1062 } 1063 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1064 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1065 } 1066 ierr = VecDestroy(&global);CHKERRQ(ierr); 1067 ierr = VecDestroy(&local);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1073 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1074 { 1075 PC_IS *pcis = (PC_IS*)(pc->data); 1076 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1077 PetscErrorCode ierr; 1078 1079 PetscFunctionBegin; 1080 if (!pcbddc->benign_have_null) { 1081 PetscFunctionReturn(0); 1082 } 1083 if (pcbddc->ChangeOfBasisMatrix) { 1084 Vec swap; 1085 1086 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1087 swap = pcbddc->work_change; 1088 pcbddc->work_change = r; 1089 r = swap; 1090 } 1091 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1093 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1094 ierr = VecSet(z,0.);CHKERRQ(ierr); 1095 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1096 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1097 if (pcbddc->ChangeOfBasisMatrix) { 1098 Vec swap; 1099 1100 swap = r; 1101 r = pcbddc->work_change; 1102 pcbddc->work_change = swap; 1103 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1104 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1105 } 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1111 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1112 { 1113 PCBDDCBenignMatMult_ctx ctx; 1114 PetscErrorCode ierr; 1115 PetscBool apply_right,apply_left,reset_x; 1116 1117 PetscFunctionBegin; 1118 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1119 if (transpose) { 1120 apply_right = ctx->apply_left; 1121 apply_left = ctx->apply_right; 1122 } else { 1123 apply_right = ctx->apply_right; 1124 apply_left = ctx->apply_left; 1125 } 1126 reset_x = PETSC_FALSE; 1127 if (apply_right) { 1128 const PetscScalar *ax; 1129 PetscInt nl,i; 1130 1131 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1132 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1133 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1134 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1135 for (i=0;i<ctx->benign_n;i++) { 1136 PetscScalar sum,val; 1137 const PetscInt *idxs; 1138 PetscInt nz,j; 1139 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1140 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1141 sum = 0.; 1142 if (ctx->apply_p0) { 1143 val = ctx->work[idxs[nz-1]]; 1144 for (j=0;j<nz-1;j++) { 1145 sum += ctx->work[idxs[j]]; 1146 ctx->work[idxs[j]] += val; 1147 } 1148 } else { 1149 for (j=0;j<nz-1;j++) { 1150 sum += ctx->work[idxs[j]]; 1151 } 1152 } 1153 ctx->work[idxs[nz-1]] -= sum; 1154 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1155 } 1156 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1157 reset_x = PETSC_TRUE; 1158 } 1159 if (transpose) { 1160 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1161 } else { 1162 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1163 } 1164 if (reset_x) { 1165 ierr = VecResetArray(x);CHKERRQ(ierr); 1166 } 1167 if (apply_left) { 1168 PetscScalar *ay; 1169 PetscInt i; 1170 1171 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1172 for (i=0;i<ctx->benign_n;i++) { 1173 PetscScalar sum,val; 1174 const PetscInt *idxs; 1175 PetscInt nz,j; 1176 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1177 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1178 val = -ay[idxs[nz-1]]; 1179 if (ctx->apply_p0) { 1180 sum = 0.; 1181 for (j=0;j<nz-1;j++) { 1182 sum += ay[idxs[j]]; 1183 ay[idxs[j]] += val; 1184 } 1185 ay[idxs[nz-1]] += sum; 1186 } else { 1187 for (j=0;j<nz-1;j++) { 1188 ay[idxs[j]] += val; 1189 } 1190 ay[idxs[nz-1]] = 0.; 1191 } 1192 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1193 } 1194 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1195 } 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1201 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1202 { 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1212 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1213 { 1214 PetscErrorCode ierr; 1215 1216 PetscFunctionBegin; 1217 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "PCBDDCBenignShellMat" 1223 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1224 { 1225 PC_IS *pcis = (PC_IS*)pc->data; 1226 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1227 PCBDDCBenignMatMult_ctx ctx; 1228 PetscErrorCode ierr; 1229 1230 PetscFunctionBegin; 1231 if (!restore) { 1232 Mat A_IB,A_BI; 1233 PetscScalar *work; 1234 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1235 1236 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1237 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1238 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1239 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1240 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1241 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1242 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1243 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1244 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1245 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1246 ctx->apply_left = PETSC_TRUE; 1247 ctx->apply_right = PETSC_FALSE; 1248 ctx->apply_p0 = PETSC_FALSE; 1249 ctx->benign_n = pcbddc->benign_n; 1250 if (reuse) { 1251 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1252 ctx->free = PETSC_FALSE; 1253 } else { /* TODO: could be optimized for successive solves */ 1254 ISLocalToGlobalMapping N_to_D; 1255 PetscInt i; 1256 1257 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1258 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1259 for (i=0;i<pcbddc->benign_n;i++) { 1260 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1261 } 1262 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1263 ctx->free = PETSC_TRUE; 1264 } 1265 ctx->A = pcis->A_IB; 1266 ctx->work = work; 1267 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1268 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1269 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1270 pcis->A_IB = A_IB; 1271 1272 /* A_BI as A_IB^T */ 1273 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1274 pcbddc->benign_original_mat = pcis->A_BI; 1275 pcis->A_BI = A_BI; 1276 } else { 1277 if (!pcbddc->benign_original_mat) { 1278 PetscFunctionReturn(0); 1279 } 1280 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1281 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1282 pcis->A_IB = ctx->A; 1283 ctx->A = NULL; 1284 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1285 pcis->A_BI = pcbddc->benign_original_mat; 1286 pcbddc->benign_original_mat = NULL; 1287 if (ctx->free) { 1288 PetscInt i; 1289 for (i=0;i<ctx->benign_n;i++) { 1290 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1291 } 1292 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1293 } 1294 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1295 ierr = PetscFree(ctx);CHKERRQ(ierr); 1296 } 1297 PetscFunctionReturn(0); 1298 } 1299 1300 /* used just in bddc debug mode */ 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "PCBDDCBenignProject" 1303 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1304 { 1305 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1306 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1307 Mat An; 1308 PetscErrorCode ierr; 1309 1310 PetscFunctionBegin; 1311 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1312 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1313 if (is1) { 1314 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1315 ierr = MatDestroy(&An);CHKERRQ(ierr); 1316 } else { 1317 *B = An; 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 /* TODO: add reuse flag */ 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatSeqAIJCompress" 1325 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1326 { 1327 Mat Bt; 1328 PetscScalar *a,*bdata; 1329 const PetscInt *ii,*ij; 1330 PetscInt m,n,i,nnz,*bii,*bij; 1331 PetscBool flg_row; 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1336 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1337 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1338 nnz = n; 1339 for (i=0;i<ii[n];i++) { 1340 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1341 } 1342 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1343 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1344 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1345 nnz = 0; 1346 bii[0] = 0; 1347 for (i=0;i<n;i++) { 1348 PetscInt j; 1349 for (j=ii[i];j<ii[i+1];j++) { 1350 PetscScalar entry = a[j]; 1351 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1352 bij[nnz] = ij[j]; 1353 bdata[nnz] = entry; 1354 nnz++; 1355 } 1356 } 1357 bii[i+1] = nnz; 1358 } 1359 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1360 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1361 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1362 { 1363 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1364 b->free_a = PETSC_TRUE; 1365 b->free_ij = PETSC_TRUE; 1366 } 1367 *B = Bt; 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatDetectDisconnectedComponents" 1373 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1374 { 1375 Mat B; 1376 IS is_dummy,*cc_n; 1377 ISLocalToGlobalMapping l2gmap_dummy; 1378 PCBDDCGraph graph; 1379 PetscInt i,n; 1380 PetscInt *xadj,*adjncy; 1381 PetscInt *xadj_filtered,*adjncy_filtered; 1382 PetscBool flg_row,isseqaij; 1383 PetscErrorCode ierr; 1384 1385 PetscFunctionBegin; 1386 if (!A->rmap->N || !A->cmap->N) { 1387 *ncc = 0; 1388 *cc = NULL; 1389 PetscFunctionReturn(0); 1390 } 1391 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1392 if (!isseqaij && filter) { 1393 PetscBool isseqdense; 1394 1395 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1396 if (!isseqdense) { 1397 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1398 } else { /* TODO: rectangular case and LDA */ 1399 PetscScalar *array; 1400 PetscReal chop=1.e-6; 1401 1402 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1403 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1404 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1405 for (i=0;i<n;i++) { 1406 PetscInt j; 1407 for (j=i+1;j<n;j++) { 1408 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1409 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1410 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1411 } 1412 } 1413 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1414 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1415 } 1416 } else { 1417 B = A; 1418 } 1419 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1420 1421 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1422 if (filter) { 1423 PetscScalar *data; 1424 PetscInt j,cum; 1425 1426 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1427 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1428 cum = 0; 1429 for (i=0;i<n;i++) { 1430 PetscInt t; 1431 1432 for (j=xadj[i];j<xadj[i+1];j++) { 1433 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1434 continue; 1435 } 1436 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1437 } 1438 t = xadj_filtered[i]; 1439 xadj_filtered[i] = cum; 1440 cum += t; 1441 } 1442 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1443 } else { 1444 xadj_filtered = NULL; 1445 adjncy_filtered = NULL; 1446 } 1447 1448 /* compute local connected components using PCBDDCGraph */ 1449 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1450 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1451 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1452 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1453 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1454 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1455 if (xadj_filtered) { 1456 graph->xadj = xadj_filtered; 1457 graph->adjncy = adjncy_filtered; 1458 } else { 1459 graph->xadj = xadj; 1460 graph->adjncy = adjncy; 1461 } 1462 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1463 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1464 /* partial clean up */ 1465 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1466 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1467 if (A != B) { 1468 ierr = MatDestroy(&B);CHKERRQ(ierr); 1469 } 1470 1471 /* get back data */ 1472 if (ncc) *ncc = graph->ncc; 1473 if (cc) { 1474 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1475 for (i=0;i<graph->ncc;i++) { 1476 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1477 } 1478 *cc = cc_n; 1479 } 1480 /* clean up graph */ 1481 graph->xadj = 0; 1482 graph->adjncy = 0; 1483 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "PCBDDCBenignCheck" 1489 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1490 { 1491 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1492 PC_IS* pcis = (PC_IS*)(pc->data); 1493 IS dirIS = NULL; 1494 PetscInt i; 1495 PetscErrorCode ierr; 1496 1497 PetscFunctionBegin; 1498 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1499 if (zerodiag) { 1500 Mat A; 1501 Vec vec3_N; 1502 PetscScalar *vals; 1503 const PetscInt *idxs; 1504 PetscInt nz,*count; 1505 1506 /* p0 */ 1507 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1508 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1509 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1510 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1511 for (i=0;i<nz;i++) vals[i] = 1.; 1512 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1513 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1514 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1515 /* v_I */ 1516 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1517 for (i=0;i<nz;i++) vals[i] = 0.; 1518 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1519 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1520 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1521 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1522 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1523 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1524 if (dirIS) { 1525 PetscInt n; 1526 1527 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1528 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1529 for (i=0;i<n;i++) vals[i] = 0.; 1530 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1531 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1532 } 1533 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1534 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1535 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1536 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1537 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1538 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1539 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1540 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1541 ierr = PetscFree(vals);CHKERRQ(ierr); 1542 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1543 1544 /* there should not be any pressure dofs lying on the interface */ 1545 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1546 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1547 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1548 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1549 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1550 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1551 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1552 ierr = PetscFree(count);CHKERRQ(ierr); 1553 } 1554 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1555 1556 /* check PCBDDCBenignGetOrSetP0 */ 1557 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1558 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1559 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1560 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1561 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1562 for (i=0;i<pcbddc->benign_n;i++) { 1563 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1564 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1565 } 1566 PetscFunctionReturn(0); 1567 } 1568 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1571 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1572 { 1573 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1574 IS pressures,zerodiag,*zerodiag_subs; 1575 PetscInt nz,n; 1576 PetscInt *interior_dofs,n_interior_dofs; 1577 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1578 PetscErrorCode ierr; 1579 1580 PetscFunctionBegin; 1581 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1582 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1583 for (n=0;n<pcbddc->benign_n;n++) { 1584 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1585 } 1586 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1587 pcbddc->benign_n = 0; 1588 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1589 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1590 Checks if all the pressure dofs in each subdomain have a zero diagonal 1591 If not, a change of basis on pressures is not needed 1592 since the local Schur complements are already SPD 1593 */ 1594 has_null_pressures = PETSC_TRUE; 1595 have_null = PETSC_TRUE; 1596 if (pcbddc->n_ISForDofsLocal) { 1597 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1598 1599 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1600 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1601 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1602 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1603 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1604 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1605 if (!sorted) { 1606 ierr = ISSort(pressures);CHKERRQ(ierr); 1607 } 1608 } else { 1609 pressures = NULL; 1610 } 1611 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1612 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1613 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1614 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1615 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1616 if (!sorted) { 1617 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1618 } 1619 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1620 if (!nz) { 1621 if (n) have_null = PETSC_FALSE; 1622 has_null_pressures = PETSC_FALSE; 1623 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1624 } 1625 recompute_zerodiag = PETSC_FALSE; 1626 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1627 zerodiag_subs = NULL; 1628 pcbddc->benign_n = 0; 1629 n_interior_dofs = 0; 1630 interior_dofs = NULL; 1631 if (pcbddc->current_level) { /* need to compute interior nodes */ 1632 PetscInt n,i,j; 1633 PetscInt n_neigh,*neigh,*n_shared,**shared; 1634 PetscInt *iwork; 1635 1636 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1637 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1638 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1639 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1640 for (i=1;i<n_neigh;i++) 1641 for (j=0;j<n_shared[i];j++) 1642 iwork[shared[i][j]] += 1; 1643 for (i=0;i<n;i++) 1644 if (!iwork[i]) 1645 interior_dofs[n_interior_dofs++] = i; 1646 ierr = PetscFree(iwork);CHKERRQ(ierr); 1647 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1648 } 1649 if (has_null_pressures) { 1650 IS *subs; 1651 PetscInt nsubs,i,j,nl; 1652 const PetscInt *idxs; 1653 PetscScalar *array; 1654 Vec *work; 1655 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1656 1657 subs = pcbddc->local_subs; 1658 nsubs = pcbddc->n_local_subs; 1659 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1660 if (pcbddc->current_level) { 1661 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1662 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1663 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1664 /* work[0] = 1_p */ 1665 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1666 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1667 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1668 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1669 /* work[0] = 1_v */ 1670 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1671 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1672 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1673 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1674 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1675 } 1676 if (nsubs > 1) { 1677 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1678 for (i=0;i<nsubs;i++) { 1679 ISLocalToGlobalMapping l2g; 1680 IS t_zerodiag_subs; 1681 PetscInt nl; 1682 1683 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1684 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1685 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1686 if (nl) { 1687 PetscBool valid = PETSC_TRUE; 1688 1689 if (pcbddc->current_level) { 1690 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1691 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1692 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1693 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1694 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1695 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1696 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 1697 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 1698 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1699 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 1700 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1701 for (j=0;j<n_interior_dofs;j++) { 1702 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1703 valid = PETSC_FALSE; 1704 break; 1705 } 1706 } 1707 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1708 } 1709 if (valid && pcbddc->NeumannBoundariesLocal) { 1710 IS t_bc; 1711 PetscInt nzb; 1712 1713 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 1714 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 1715 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 1716 if (nzb) valid = PETSC_FALSE; 1717 } 1718 if (valid && pressures) { 1719 IS t_pressure_subs; 1720 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 1721 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 1722 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 1723 } 1724 if (valid) { 1725 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 1726 pcbddc->benign_n++; 1727 } else { 1728 recompute_zerodiag = PETSC_TRUE; 1729 } 1730 } 1731 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 1732 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 1733 } 1734 } else { /* there's just one subdomain (or zero if they have not been detected */ 1735 PetscBool valid = PETSC_TRUE; 1736 1737 if (pcbddc->NeumannBoundariesLocal) { 1738 PetscInt nzb; 1739 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 1740 if (nzb) valid = PETSC_FALSE; 1741 } 1742 if (valid && pressures) { 1743 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 1744 } 1745 if (valid && pcbddc->current_level) { 1746 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 1747 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 1748 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1749 for (j=0;j<n_interior_dofs;j++) { 1750 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1751 valid = PETSC_FALSE; 1752 break; 1753 } 1754 } 1755 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1756 } 1757 if (valid) { 1758 pcbddc->benign_n = 1; 1759 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 1760 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 1761 zerodiag_subs[0] = zerodiag; 1762 } 1763 } 1764 if (pcbddc->current_level) { 1765 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 1766 } 1767 } 1768 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 1769 1770 if (!pcbddc->benign_n) { 1771 PetscInt n; 1772 1773 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1774 recompute_zerodiag = PETSC_FALSE; 1775 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1776 if (n) { 1777 has_null_pressures = PETSC_FALSE; 1778 have_null = PETSC_FALSE; 1779 } 1780 } 1781 1782 /* final check for null pressures */ 1783 if (zerodiag && pressures) { 1784 PetscInt nz,np; 1785 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1786 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 1787 if (nz != np) have_null = PETSC_FALSE; 1788 } 1789 1790 if (recompute_zerodiag) { 1791 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1792 if (pcbddc->benign_n == 1) { 1793 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 1794 zerodiag = zerodiag_subs[0]; 1795 } else { 1796 PetscInt i,nzn,*new_idxs; 1797 1798 nzn = 0; 1799 for (i=0;i<pcbddc->benign_n;i++) { 1800 PetscInt ns; 1801 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 1802 nzn += ns; 1803 } 1804 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 1805 nzn = 0; 1806 for (i=0;i<pcbddc->benign_n;i++) { 1807 PetscInt ns,*idxs; 1808 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 1809 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1810 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 1811 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1812 nzn += ns; 1813 } 1814 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 1815 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 1816 } 1817 have_null = PETSC_FALSE; 1818 } 1819 1820 /* Prepare matrix to compute no-net-flux */ 1821 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 1822 Mat A,loc_divudotp; 1823 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 1824 IS row,col,isused = NULL; 1825 PetscInt M,N,n,st,n_isused; 1826 1827 if (pressures) { 1828 isused = pressures; 1829 } else { 1830 isused = zerodiag; 1831 } 1832 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 1833 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1834 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 1835 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 1836 n_isused = 0; 1837 if (isused) { 1838 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 1839 } 1840 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1841 st = st-n_isused; 1842 if (n) { 1843 const PetscInt *gidxs; 1844 1845 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 1846 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1847 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 1848 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1849 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1850 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1851 } else { 1852 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 1853 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1854 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1855 } 1856 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 1857 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 1858 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 1859 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 1860 ierr = ISDestroy(&row);CHKERRQ(ierr); 1861 ierr = ISDestroy(&col);CHKERRQ(ierr); 1862 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 1863 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 1864 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1865 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1866 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1867 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1868 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1869 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1870 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1871 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1872 } 1873 1874 /* change of basis and p0 dofs */ 1875 if (has_null_pressures) { 1876 IS zerodiagc; 1877 const PetscInt *idxs,*idxsc; 1878 PetscInt i,s,*nnz; 1879 1880 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1881 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1882 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1883 /* local change of basis for pressures */ 1884 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1885 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1886 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1887 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1888 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1889 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1890 for (i=0;i<pcbddc->benign_n;i++) { 1891 PetscInt nzs,j; 1892 1893 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1894 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1895 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1896 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1897 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1898 } 1899 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1900 ierr = PetscFree(nnz);CHKERRQ(ierr); 1901 /* set identity on velocities */ 1902 for (i=0;i<n-nz;i++) { 1903 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1904 } 1905 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1906 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1907 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1908 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1909 /* set change on pressures */ 1910 for (s=0;s<pcbddc->benign_n;s++) { 1911 PetscScalar *array; 1912 PetscInt nzs; 1913 1914 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1915 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1916 for (i=0;i<nzs-1;i++) { 1917 PetscScalar vals[2]; 1918 PetscInt cols[2]; 1919 1920 cols[0] = idxs[i]; 1921 cols[1] = idxs[nzs-1]; 1922 vals[0] = 1.; 1923 vals[1] = 1.; 1924 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1925 } 1926 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1927 for (i=0;i<nzs-1;i++) array[i] = -1.; 1928 array[nzs-1] = 1.; 1929 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1930 /* store local idxs for p0 */ 1931 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1932 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1933 ierr = PetscFree(array);CHKERRQ(ierr); 1934 } 1935 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1936 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1937 /* project if needed */ 1938 if (pcbddc->benign_change_explicit) { 1939 Mat M; 1940 1941 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1942 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1943 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1944 ierr = MatDestroy(&M);CHKERRQ(ierr); 1945 } 1946 /* store global idxs for p0 */ 1947 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1948 } 1949 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1950 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1951 1952 /* determines if the coarse solver will be singular or not */ 1953 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1954 /* determines if the problem has subdomains with 0 pressure block */ 1955 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1956 *zerodiaglocal = zerodiag; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1962 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1963 { 1964 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1965 PetscScalar *array; 1966 PetscErrorCode ierr; 1967 1968 PetscFunctionBegin; 1969 if (!pcbddc->benign_sf) { 1970 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1971 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1972 } 1973 if (get) { 1974 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1975 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1976 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1977 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1978 } else { 1979 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1980 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1981 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1982 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1983 } 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1989 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1990 { 1991 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 /* TODO: add error checking 1996 - avoid nested pop (or push) calls. 1997 - cannot push before pop. 1998 - cannot call this if pcbddc->local_mat is NULL 1999 */ 2000 if (!pcbddc->benign_n) { 2001 PetscFunctionReturn(0); 2002 } 2003 if (pop) { 2004 if (pcbddc->benign_change_explicit) { 2005 IS is_p0; 2006 MatReuse reuse; 2007 2008 /* extract B_0 */ 2009 reuse = MAT_INITIAL_MATRIX; 2010 if (pcbddc->benign_B0) { 2011 reuse = MAT_REUSE_MATRIX; 2012 } 2013 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2014 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2015 /* remove rows and cols from local problem */ 2016 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2017 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2018 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2019 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2020 } else { 2021 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2022 PetscScalar *vals; 2023 PetscInt i,n,*idxs_ins; 2024 2025 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2026 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2027 if (!pcbddc->benign_B0) { 2028 PetscInt *nnz; 2029 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2030 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2031 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2032 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2033 for (i=0;i<pcbddc->benign_n;i++) { 2034 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2035 nnz[i] = n - nnz[i]; 2036 } 2037 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2038 ierr = PetscFree(nnz);CHKERRQ(ierr); 2039 } 2040 2041 for (i=0;i<pcbddc->benign_n;i++) { 2042 PetscScalar *array; 2043 PetscInt *idxs,j,nz,cum; 2044 2045 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2046 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2047 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2048 for (j=0;j<nz;j++) vals[j] = 1.; 2049 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2050 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2051 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2052 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2053 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2054 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2055 cum = 0; 2056 for (j=0;j<n;j++) { 2057 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2058 vals[cum] = array[j]; 2059 idxs_ins[cum] = j; 2060 cum++; 2061 } 2062 } 2063 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2064 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2065 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2066 } 2067 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2070 } 2071 } else { /* push */ 2072 if (pcbddc->benign_change_explicit) { 2073 PetscInt i; 2074 2075 for (i=0;i<pcbddc->benign_n;i++) { 2076 PetscScalar *B0_vals; 2077 PetscInt *B0_cols,B0_ncol; 2078 2079 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2080 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2081 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2082 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2083 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2084 } 2085 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2086 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 } else { 2088 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2089 } 2090 } 2091 PetscFunctionReturn(0); 2092 } 2093 2094 #undef __FUNCT__ 2095 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2096 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2097 { 2098 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2099 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2100 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2101 PetscBLASInt *B_iwork,*B_ifail; 2102 PetscScalar *work,lwork; 2103 PetscScalar *St,*S,*eigv; 2104 PetscScalar *Sarray,*Starray; 2105 PetscReal *eigs,thresh; 2106 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2107 PetscBool allocated_S_St; 2108 #if defined(PETSC_USE_COMPLEX) 2109 PetscReal *rwork; 2110 #endif 2111 PetscErrorCode ierr; 2112 2113 PetscFunctionBegin; 2114 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2115 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2116 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2117 2118 if (pcbddc->dbg_flag) { 2119 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2120 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2121 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2122 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2123 } 2124 2125 if (pcbddc->dbg_flag) { 2126 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2127 } 2128 2129 /* max size of subsets */ 2130 mss = 0; 2131 for (i=0;i<sub_schurs->n_subs;i++) { 2132 PetscInt subset_size; 2133 2134 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2135 mss = PetscMax(mss,subset_size); 2136 } 2137 2138 /* min/max and threshold */ 2139 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2140 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2141 nmax = PetscMax(nmin,nmax); 2142 allocated_S_St = PETSC_FALSE; 2143 if (nmin) { 2144 allocated_S_St = PETSC_TRUE; 2145 } 2146 2147 /* allocate lapack workspace */ 2148 cum = cum2 = 0; 2149 maxneigs = 0; 2150 for (i=0;i<sub_schurs->n_subs;i++) { 2151 PetscInt n,subset_size; 2152 2153 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2154 n = PetscMin(subset_size,nmax); 2155 cum += subset_size; 2156 cum2 += subset_size*n; 2157 maxneigs = PetscMax(maxneigs,n); 2158 } 2159 if (mss) { 2160 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2161 PetscBLASInt B_itype = 1; 2162 PetscBLASInt B_N = mss; 2163 PetscReal zero = 0.0; 2164 PetscReal eps = 0.0; /* dlamch? */ 2165 2166 B_lwork = -1; 2167 S = NULL; 2168 St = NULL; 2169 eigs = NULL; 2170 eigv = NULL; 2171 B_iwork = NULL; 2172 B_ifail = NULL; 2173 #if defined(PETSC_USE_COMPLEX) 2174 rwork = NULL; 2175 #endif 2176 thresh = 1.0; 2177 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2178 #if defined(PETSC_USE_COMPLEX) 2179 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2180 #else 2181 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2182 #endif 2183 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2184 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2185 } else { 2186 /* TODO */ 2187 } 2188 } else { 2189 lwork = 0; 2190 } 2191 2192 nv = 0; 2193 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2194 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2195 } 2196 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2197 if (allocated_S_St) { 2198 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2199 } 2200 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2201 #if defined(PETSC_USE_COMPLEX) 2202 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2203 #endif 2204 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2205 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2206 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2207 nv+cum,&pcbddc->adaptive_constraints_idxs, 2208 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2209 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2210 2211 maxneigs = 0; 2212 cum = cumarray = 0; 2213 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2214 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2215 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2216 const PetscInt *idxs; 2217 2218 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2219 for (cum=0;cum<nv;cum++) { 2220 pcbddc->adaptive_constraints_n[cum] = 1; 2221 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2222 pcbddc->adaptive_constraints_data[cum] = 1.0; 2223 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2224 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2225 } 2226 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2227 } 2228 2229 if (mss) { /* multilevel */ 2230 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2231 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2232 } 2233 2234 thresh = pcbddc->adaptive_threshold; 2235 for (i=0;i<sub_schurs->n_subs;i++) { 2236 const PetscInt *idxs; 2237 PetscReal upper,lower; 2238 PetscInt j,subset_size,eigs_start = 0; 2239 PetscBLASInt B_N; 2240 PetscBool same_data = PETSC_FALSE; 2241 2242 if (pcbddc->use_deluxe_scaling) { 2243 upper = PETSC_MAX_REAL; 2244 lower = thresh; 2245 } else { 2246 upper = 1./thresh; 2247 lower = 0.; 2248 } 2249 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2250 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2251 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2252 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2253 if (sub_schurs->is_hermitian) { 2254 PetscInt j,k; 2255 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2256 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2257 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2258 } 2259 for (j=0;j<subset_size;j++) { 2260 for (k=j;k<subset_size;k++) { 2261 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2262 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2263 } 2264 } 2265 } else { 2266 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2267 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2268 } 2269 } else { 2270 S = Sarray + cumarray; 2271 St = Starray + cumarray; 2272 } 2273 /* see if we can save some work */ 2274 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2275 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2276 } 2277 2278 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2279 B_neigs = 0; 2280 } else { 2281 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2282 PetscBLASInt B_itype = 1; 2283 PetscBLASInt B_IL, B_IU; 2284 PetscReal eps = -1.0; /* dlamch? */ 2285 PetscInt nmin_s; 2286 PetscBool compute_range = PETSC_FALSE; 2287 2288 if (pcbddc->dbg_flag) { 2289 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2290 } 2291 2292 compute_range = PETSC_FALSE; 2293 if (thresh > 1.+PETSC_SMALL && !same_data) { 2294 compute_range = PETSC_TRUE; 2295 } 2296 2297 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2298 if (compute_range) { 2299 2300 /* ask for eigenvalues larger than thresh */ 2301 #if defined(PETSC_USE_COMPLEX) 2302 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2303 #else 2304 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2305 #endif 2306 } else if (!same_data) { 2307 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2308 B_IL = 1; 2309 #if defined(PETSC_USE_COMPLEX) 2310 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2311 #else 2312 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2313 #endif 2314 } else { /* same_data is true, so get the adaptive function requested by the user */ 2315 PetscInt k; 2316 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2317 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2318 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2319 nmin = nmax; 2320 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2321 for (k=0;k<nmax;k++) { 2322 eigs[k] = 1./PETSC_SMALL; 2323 eigv[k*(subset_size+1)] = 1.0; 2324 } 2325 } 2326 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2327 if (B_ierr) { 2328 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2329 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2330 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2331 } 2332 2333 if (B_neigs > nmax) { 2334 if (pcbddc->dbg_flag) { 2335 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2336 } 2337 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2338 B_neigs = nmax; 2339 } 2340 2341 nmin_s = PetscMin(nmin,B_N); 2342 if (B_neigs < nmin_s) { 2343 PetscBLASInt B_neigs2; 2344 2345 if (pcbddc->use_deluxe_scaling) { 2346 B_IL = B_N - nmin_s + 1; 2347 B_IU = B_N - B_neigs; 2348 } else { 2349 B_IL = B_neigs + 1; 2350 B_IU = nmin_s; 2351 } 2352 if (pcbddc->dbg_flag) { 2353 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2354 } 2355 if (sub_schurs->is_hermitian) { 2356 PetscInt j,k; 2357 for (j=0;j<subset_size;j++) { 2358 for (k=j;k<subset_size;k++) { 2359 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2360 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2361 } 2362 } 2363 } else { 2364 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2365 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2366 } 2367 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2368 #if defined(PETSC_USE_COMPLEX) 2369 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2370 #else 2371 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2372 #endif 2373 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2374 B_neigs += B_neigs2; 2375 } 2376 if (B_ierr) { 2377 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2378 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2379 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2380 } 2381 if (pcbddc->dbg_flag) { 2382 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2383 for (j=0;j<B_neigs;j++) { 2384 if (eigs[j] == 0.0) { 2385 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2386 } else { 2387 if (pcbddc->use_deluxe_scaling) { 2388 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2389 } else { 2390 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2391 } 2392 } 2393 } 2394 } 2395 } else { 2396 /* TODO */ 2397 } 2398 } 2399 /* change the basis back to the original one */ 2400 if (sub_schurs->change) { 2401 Mat change,phi,phit; 2402 2403 if (pcbddc->dbg_flag > 1) { 2404 PetscInt ii; 2405 for (ii=0;ii<B_neigs;ii++) { 2406 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2407 for (j=0;j<B_N;j++) { 2408 #if defined(PETSC_USE_COMPLEX) 2409 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2410 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2412 #else 2413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2414 #endif 2415 } 2416 } 2417 } 2418 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2419 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2420 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2421 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2422 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2423 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2424 } 2425 maxneigs = PetscMax(B_neigs,maxneigs); 2426 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2427 if (B_neigs) { 2428 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2429 2430 if (pcbddc->dbg_flag > 1) { 2431 PetscInt ii; 2432 for (ii=0;ii<B_neigs;ii++) { 2433 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2434 for (j=0;j<B_N;j++) { 2435 #if defined(PETSC_USE_COMPLEX) 2436 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2437 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2438 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2439 #else 2440 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2441 #endif 2442 } 2443 } 2444 } 2445 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2446 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2447 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2448 cum++; 2449 } 2450 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2451 /* shift for next computation */ 2452 cumarray += subset_size*subset_size; 2453 } 2454 if (pcbddc->dbg_flag) { 2455 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2456 } 2457 2458 if (mss) { 2459 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2460 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2461 /* destroy matrices (junk) */ 2462 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2463 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2464 } 2465 if (allocated_S_St) { 2466 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2467 } 2468 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2469 #if defined(PETSC_USE_COMPLEX) 2470 ierr = PetscFree(rwork);CHKERRQ(ierr); 2471 #endif 2472 if (pcbddc->dbg_flag) { 2473 PetscInt maxneigs_r; 2474 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2475 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2476 } 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "PCBDDCSetUpSolvers" 2482 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2483 { 2484 PetscScalar *coarse_submat_vals; 2485 PetscErrorCode ierr; 2486 2487 PetscFunctionBegin; 2488 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2489 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2490 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2491 2492 /* Setup local neumann solver ksp_R */ 2493 /* PCBDDCSetUpLocalScatters should be called first! */ 2494 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2495 2496 /* 2497 Setup local correction and local part of coarse basis. 2498 Gives back the dense local part of the coarse matrix in column major ordering 2499 */ 2500 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2501 2502 /* Compute total number of coarse nodes and setup coarse solver */ 2503 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2504 2505 /* free */ 2506 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2507 PetscFunctionReturn(0); 2508 } 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "PCBDDCResetCustomization" 2512 PetscErrorCode PCBDDCResetCustomization(PC pc) 2513 { 2514 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2515 PetscErrorCode ierr; 2516 2517 PetscFunctionBegin; 2518 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2519 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2520 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2521 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2522 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2523 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2524 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2525 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2526 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2527 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2528 PetscFunctionReturn(0); 2529 } 2530 2531 #undef __FUNCT__ 2532 #define __FUNCT__ "PCBDDCResetTopography" 2533 PetscErrorCode PCBDDCResetTopography(PC pc) 2534 { 2535 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2536 PetscInt i; 2537 PetscErrorCode ierr; 2538 2539 PetscFunctionBegin; 2540 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2541 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2542 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2543 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2544 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2545 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2546 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2547 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2548 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2549 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2550 pcbddc->graphanalyzed = PETSC_FALSE; 2551 for (i=0;i<pcbddc->n_local_subs;i++) { 2552 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2553 } 2554 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2555 if (pcbddc->sub_schurs) { 2556 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "PCBDDCResetSolvers" 2563 PetscErrorCode PCBDDCResetSolvers(PC pc) 2564 { 2565 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2566 PetscErrorCode ierr; 2567 2568 PetscFunctionBegin; 2569 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2570 if (pcbddc->coarse_phi_B) { 2571 PetscScalar *array; 2572 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2573 ierr = PetscFree(array);CHKERRQ(ierr); 2574 } 2575 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2576 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2577 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2578 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2579 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2580 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2581 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2582 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2583 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2584 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2585 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2586 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2587 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2588 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2589 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2590 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2591 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2592 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2593 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2594 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2595 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2596 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2597 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2598 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2599 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2600 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2601 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2602 if (pcbddc->benign_zerodiag_subs) { 2603 PetscInt i; 2604 for (i=0;i<pcbddc->benign_n;i++) { 2605 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2606 } 2607 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2608 } 2609 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2615 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2616 { 2617 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2618 PC_IS *pcis = (PC_IS*)pc->data; 2619 VecType impVecType; 2620 PetscInt n_constraints,n_R,old_size; 2621 PetscErrorCode ierr; 2622 2623 PetscFunctionBegin; 2624 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2625 /* get sizes */ 2626 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2627 n_R = pcis->n - pcbddc->n_vertices; 2628 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2629 /* local work vectors (try to avoid unneeded work)*/ 2630 /* R nodes */ 2631 old_size = -1; 2632 if (pcbddc->vec1_R) { 2633 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2634 } 2635 if (n_R != old_size) { 2636 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2637 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2638 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2639 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2640 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2641 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2642 } 2643 /* local primal dofs */ 2644 old_size = -1; 2645 if (pcbddc->vec1_P) { 2646 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2647 } 2648 if (pcbddc->local_primal_size != old_size) { 2649 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2650 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2651 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2652 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2653 } 2654 /* local explicit constraints */ 2655 old_size = -1; 2656 if (pcbddc->vec1_C) { 2657 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2658 } 2659 if (n_constraints && n_constraints != old_size) { 2660 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2661 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2662 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2663 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2664 } 2665 PetscFunctionReturn(0); 2666 } 2667 2668 #undef __FUNCT__ 2669 #define __FUNCT__ "PCBDDCSetUpCorrection" 2670 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2671 { 2672 PetscErrorCode ierr; 2673 /* pointers to pcis and pcbddc */ 2674 PC_IS* pcis = (PC_IS*)pc->data; 2675 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2676 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2677 /* submatrices of local problem */ 2678 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2679 /* submatrices of local coarse problem */ 2680 Mat S_VV,S_CV,S_VC,S_CC; 2681 /* working matrices */ 2682 Mat C_CR; 2683 /* additional working stuff */ 2684 PC pc_R; 2685 Mat F; 2686 Vec dummy_vec; 2687 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2688 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2689 PetscScalar *work; 2690 PetscInt *idx_V_B; 2691 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2692 PetscInt i,n_R,n_D,n_B; 2693 2694 /* some shortcuts to scalars */ 2695 PetscScalar one=1.0,m_one=-1.0; 2696 2697 PetscFunctionBegin; 2698 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 2699 2700 /* Set Non-overlapping dimensions */ 2701 n_vertices = pcbddc->n_vertices; 2702 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 2703 n_B = pcis->n_B; 2704 n_D = pcis->n - n_B; 2705 n_R = pcis->n - n_vertices; 2706 2707 /* vertices in boundary numbering */ 2708 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 2709 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 2710 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 2711 2712 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2713 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 2714 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 2715 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 2716 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 2717 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 2718 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 2719 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 2720 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 2721 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 2722 2723 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 2724 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 2725 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 2726 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 2727 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 2728 lda_rhs = n_R; 2729 need_benign_correction = PETSC_FALSE; 2730 if (isLU || isILU || isCHOL) { 2731 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 2732 } else if (sub_schurs && sub_schurs->reuse_solver) { 2733 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2734 MatFactorType type; 2735 2736 F = reuse_solver->F; 2737 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 2738 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 2739 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 2740 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 2741 } else { 2742 F = NULL; 2743 } 2744 2745 /* allocate workspace */ 2746 n = 0; 2747 if (n_constraints) { 2748 n += lda_rhs*n_constraints; 2749 } 2750 if (n_vertices) { 2751 n = PetscMax(2*lda_rhs*n_vertices,n); 2752 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 2753 } 2754 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 2755 2756 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 2757 dummy_vec = NULL; 2758 if (need_benign_correction && lda_rhs != n_R && F) { 2759 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 2760 } 2761 2762 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2763 if (n_constraints) { 2764 Mat M1,M2,M3,C_B; 2765 IS is_aux; 2766 PetscScalar *array,*array2; 2767 2768 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2769 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2770 2771 /* Extract constraints on R nodes: C_{CR} */ 2772 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 2773 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2774 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2775 2776 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 2777 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 2778 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2779 for (i=0;i<n_constraints;i++) { 2780 const PetscScalar *row_cmat_values; 2781 const PetscInt *row_cmat_indices; 2782 PetscInt size_of_constraint,j; 2783 2784 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 2785 for (j=0;j<size_of_constraint;j++) { 2786 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 2787 } 2788 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 2789 } 2790 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 2791 if (F) { 2792 Mat B; 2793 2794 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2795 if (need_benign_correction) { 2796 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2797 2798 /* rhs is already zero on interior dofs, no need to change the rhs */ 2799 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 2800 } 2801 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 2802 if (need_benign_correction) { 2803 PetscScalar *marr; 2804 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2805 2806 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2807 if (lda_rhs != n_R) { 2808 for (i=0;i<n_constraints;i++) { 2809 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2810 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2811 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2812 } 2813 } else { 2814 for (i=0;i<n_constraints;i++) { 2815 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2816 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2817 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2818 } 2819 } 2820 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2821 } 2822 ierr = MatDestroy(&B);CHKERRQ(ierr); 2823 } else { 2824 PetscScalar *marr; 2825 2826 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2827 for (i=0;i<n_constraints;i++) { 2828 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2829 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 2830 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2831 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2832 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2833 } 2834 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2835 } 2836 if (!pcbddc->switch_static) { 2837 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2838 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 2839 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 2840 for (i=0;i<n_constraints;i++) { 2841 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 2842 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 2843 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2844 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2845 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2846 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2847 } 2848 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 2849 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 2850 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2851 } else { 2852 if (lda_rhs != n_R) { 2853 IS dummy; 2854 2855 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 2856 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2857 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 2858 } else { 2859 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 2860 pcbddc->local_auxmat2 = local_auxmat2_R; 2861 } 2862 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2863 } 2864 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2865 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2866 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 2867 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 2868 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 2869 if (isCHOL) { 2870 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 2871 } else { 2872 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 2873 } 2874 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2875 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2876 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2877 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2878 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2879 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2880 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2881 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2882 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2883 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2884 } 2885 2886 /* Get submatrices from subdomain matrix */ 2887 if (n_vertices) { 2888 IS is_aux; 2889 2890 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2891 IS tis; 2892 2893 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2894 ierr = ISSort(tis);CHKERRQ(ierr); 2895 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2896 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2897 } else { 2898 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2899 } 2900 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2901 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2902 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2903 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2904 } 2905 2906 /* Matrix of coarse basis functions (local) */ 2907 if (pcbddc->coarse_phi_B) { 2908 PetscInt on_B,on_primal,on_D=n_D; 2909 if (pcbddc->coarse_phi_D) { 2910 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2911 } 2912 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2913 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2914 PetscScalar *marray; 2915 2916 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2917 ierr = PetscFree(marray);CHKERRQ(ierr); 2918 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2919 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2920 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2921 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2922 } 2923 } 2924 2925 if (!pcbddc->coarse_phi_B) { 2926 PetscScalar *marray; 2927 2928 n = n_B*pcbddc->local_primal_size; 2929 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2930 n += n_D*pcbddc->local_primal_size; 2931 } 2932 if (!pcbddc->symmetric_primal) { 2933 n *= 2; 2934 } 2935 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2936 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2937 n = n_B*pcbddc->local_primal_size; 2938 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2939 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2940 n += n_D*pcbddc->local_primal_size; 2941 } 2942 if (!pcbddc->symmetric_primal) { 2943 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2944 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2945 n = n_B*pcbddc->local_primal_size; 2946 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2947 } 2948 } else { 2949 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2950 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2951 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2952 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2953 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2954 } 2955 } 2956 } 2957 2958 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2959 p0_lidx_I = NULL; 2960 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2961 const PetscInt *idxs; 2962 2963 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2964 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2965 for (i=0;i<pcbddc->benign_n;i++) { 2966 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2967 } 2968 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2969 } 2970 2971 /* vertices */ 2972 if (n_vertices) { 2973 2974 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2975 2976 if (n_R) { 2977 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2978 PetscBLASInt B_N,B_one = 1; 2979 PetscScalar *x,*y; 2980 PetscBool isseqaij; 2981 2982 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2983 if (need_benign_correction) { 2984 ISLocalToGlobalMapping RtoN; 2985 IS is_p0; 2986 PetscInt *idxs_p0,n; 2987 2988 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2989 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2990 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2991 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2992 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2993 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2994 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2995 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2996 } 2997 2998 if (lda_rhs == n_R) { 2999 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3000 } else { 3001 PetscScalar *av,*array; 3002 const PetscInt *xadj,*adjncy; 3003 PetscInt n; 3004 PetscBool flg_row; 3005 3006 array = work+lda_rhs*n_vertices; 3007 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3008 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3009 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3010 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3011 for (i=0;i<n;i++) { 3012 PetscInt j; 3013 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3014 } 3015 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3016 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3017 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3018 } 3019 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3020 if (need_benign_correction) { 3021 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3022 PetscScalar *marr; 3023 3024 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3025 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3026 3027 | 0 0 0 | (V) 3028 L = | 0 0 -1 | (P-p0) 3029 | 0 0 -1 | (p0) 3030 3031 */ 3032 for (i=0;i<reuse_solver->benign_n;i++) { 3033 const PetscScalar *vals; 3034 const PetscInt *idxs,*idxs_zero; 3035 PetscInt n,j,nz; 3036 3037 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3038 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3039 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3040 for (j=0;j<n;j++) { 3041 PetscScalar val = vals[j]; 3042 PetscInt k,col = idxs[j]; 3043 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3044 } 3045 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3046 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3047 } 3048 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3049 } 3050 if (F) { 3051 /* need to correct the rhs */ 3052 if (need_benign_correction) { 3053 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3054 PetscScalar *marr; 3055 3056 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3057 if (lda_rhs != n_R) { 3058 for (i=0;i<n_vertices;i++) { 3059 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3060 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3061 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3062 } 3063 } else { 3064 for (i=0;i<n_vertices;i++) { 3065 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3066 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3067 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3068 } 3069 } 3070 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3071 } 3072 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3073 /* need to correct the solution */ 3074 if (need_benign_correction) { 3075 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3076 PetscScalar *marr; 3077 3078 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3079 if (lda_rhs != n_R) { 3080 for (i=0;i<n_vertices;i++) { 3081 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3082 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3083 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3084 } 3085 } else { 3086 for (i=0;i<n_vertices;i++) { 3087 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3088 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3089 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3090 } 3091 } 3092 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3093 } 3094 } else { 3095 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3096 for (i=0;i<n_vertices;i++) { 3097 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3098 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3099 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3100 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3101 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3102 } 3103 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3104 } 3105 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3106 /* S_VV and S_CV */ 3107 if (n_constraints) { 3108 Mat B; 3109 3110 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3111 for (i=0;i<n_vertices;i++) { 3112 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3113 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3114 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3115 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3116 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3117 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3118 } 3119 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3120 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3121 ierr = MatDestroy(&B);CHKERRQ(ierr); 3122 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3123 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3124 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3125 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3126 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3127 ierr = MatDestroy(&B);CHKERRQ(ierr); 3128 } 3129 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3130 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3131 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3132 } 3133 if (lda_rhs != n_R) { 3134 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3135 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3136 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3137 } 3138 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3139 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3140 if (need_benign_correction) { 3141 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3142 PetscScalar *marr,*sums; 3143 3144 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3145 ierr = MatDenseGetArray(S_VVt,&marr); 3146 for (i=0;i<reuse_solver->benign_n;i++) { 3147 const PetscScalar *vals; 3148 const PetscInt *idxs,*idxs_zero; 3149 PetscInt n,j,nz; 3150 3151 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3152 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3153 for (j=0;j<n_vertices;j++) { 3154 PetscInt k; 3155 sums[j] = 0.; 3156 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3157 } 3158 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3159 for (j=0;j<n;j++) { 3160 PetscScalar val = vals[j]; 3161 PetscInt k; 3162 for (k=0;k<n_vertices;k++) { 3163 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3164 } 3165 } 3166 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3167 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3168 } 3169 ierr = PetscFree(sums);CHKERRQ(ierr); 3170 ierr = MatDenseRestoreArray(S_VVt,&marr); 3171 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3172 } 3173 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3174 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3175 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3176 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3177 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3178 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3179 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3180 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3181 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3182 } else { 3183 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3184 } 3185 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3186 3187 /* coarse basis functions */ 3188 for (i=0;i<n_vertices;i++) { 3189 PetscScalar *y; 3190 3191 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3192 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3193 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3194 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3195 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3196 y[n_B*i+idx_V_B[i]] = 1.0; 3197 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3198 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3199 3200 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3201 PetscInt j; 3202 3203 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3204 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3205 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3206 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3207 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3208 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3209 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3210 } 3211 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3212 } 3213 /* if n_R == 0 the object is not destroyed */ 3214 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3215 } 3216 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3217 3218 if (n_constraints) { 3219 Mat B; 3220 3221 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3222 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3223 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3224 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3225 if (n_vertices) { 3226 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3227 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3228 } else { 3229 Mat S_VCt; 3230 3231 if (lda_rhs != n_R) { 3232 ierr = MatDestroy(&B);CHKERRQ(ierr); 3233 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3234 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3235 } 3236 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3237 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3238 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3239 } 3240 } 3241 ierr = MatDestroy(&B);CHKERRQ(ierr); 3242 /* coarse basis functions */ 3243 for (i=0;i<n_constraints;i++) { 3244 PetscScalar *y; 3245 3246 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3247 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3248 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3249 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3250 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3251 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3252 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3253 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3254 PetscInt j; 3255 3256 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3257 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3258 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3259 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3260 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3261 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3262 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3263 } 3264 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3265 } 3266 } 3267 if (n_constraints) { 3268 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3269 } 3270 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3271 3272 /* coarse matrix entries relative to B_0 */ 3273 if (pcbddc->benign_n) { 3274 Mat B0_B,B0_BPHI; 3275 IS is_dummy; 3276 PetscScalar *data; 3277 PetscInt j; 3278 3279 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3280 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3281 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3282 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3283 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3284 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3285 for (j=0;j<pcbddc->benign_n;j++) { 3286 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3287 for (i=0;i<pcbddc->local_primal_size;i++) { 3288 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3289 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3290 } 3291 } 3292 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3293 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3294 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3295 } 3296 3297 /* compute other basis functions for non-symmetric problems */ 3298 if (!pcbddc->symmetric_primal) { 3299 Mat B_V=NULL,B_C=NULL; 3300 PetscScalar *marray; 3301 3302 if (n_constraints) { 3303 Mat S_CCT,C_CRT; 3304 3305 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3306 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3307 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3308 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3309 if (n_vertices) { 3310 Mat S_VCT; 3311 3312 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3313 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3314 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3315 } 3316 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3317 } else { 3318 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3319 } 3320 if (n_vertices && n_R) { 3321 PetscScalar *av,*marray; 3322 const PetscInt *xadj,*adjncy; 3323 PetscInt n; 3324 PetscBool flg_row; 3325 3326 /* B_V = B_V - A_VR^T */ 3327 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3328 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3329 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3330 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3331 for (i=0;i<n;i++) { 3332 PetscInt j; 3333 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3334 } 3335 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3336 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3337 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3338 } 3339 3340 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3341 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3342 for (i=0;i<n_vertices;i++) { 3343 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3344 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3345 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3346 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3347 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3348 } 3349 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3350 if (B_C) { 3351 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3352 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3353 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3354 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3355 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3356 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3357 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3358 } 3359 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3360 } 3361 /* coarse basis functions */ 3362 for (i=0;i<pcbddc->local_primal_size;i++) { 3363 PetscScalar *y; 3364 3365 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3366 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3367 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3368 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3369 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3370 if (i<n_vertices) { 3371 y[n_B*i+idx_V_B[i]] = 1.0; 3372 } 3373 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3374 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3375 3376 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3377 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3378 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3379 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3380 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3381 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3382 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3383 } 3384 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3385 } 3386 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3387 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3388 } 3389 /* free memory */ 3390 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3391 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3392 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3393 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3394 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3395 ierr = PetscFree(work);CHKERRQ(ierr); 3396 if (n_vertices) { 3397 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3398 } 3399 if (n_constraints) { 3400 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3401 } 3402 /* Checking coarse_sub_mat and coarse basis functios */ 3403 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3404 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3405 if (pcbddc->dbg_flag) { 3406 Mat coarse_sub_mat; 3407 Mat AUXMAT,TM1,TM2,TM3,TM4; 3408 Mat coarse_phi_D,coarse_phi_B; 3409 Mat coarse_psi_D,coarse_psi_B; 3410 Mat A_II,A_BB,A_IB,A_BI; 3411 Mat C_B,CPHI; 3412 IS is_dummy; 3413 Vec mones; 3414 MatType checkmattype=MATSEQAIJ; 3415 PetscReal real_value; 3416 3417 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3418 Mat A; 3419 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3420 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3421 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3422 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3423 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3424 ierr = MatDestroy(&A);CHKERRQ(ierr); 3425 } else { 3426 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3427 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3428 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3429 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3430 } 3431 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3432 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3433 if (!pcbddc->symmetric_primal) { 3434 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3435 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3436 } 3437 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3438 3439 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3440 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3441 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3442 if (!pcbddc->symmetric_primal) { 3443 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3444 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3445 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3446 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3447 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3448 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3449 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3450 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3451 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3452 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3453 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3454 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3455 } else { 3456 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3457 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3458 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3459 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3460 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3461 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3462 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3463 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3464 } 3465 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3466 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3467 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3468 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3469 if (pcbddc->benign_n) { 3470 Mat B0_B,B0_BPHI; 3471 PetscScalar *data,*data2; 3472 PetscInt j; 3473 3474 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3475 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3476 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3477 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3478 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3479 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3480 for (j=0;j<pcbddc->benign_n;j++) { 3481 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3482 for (i=0;i<pcbddc->local_primal_size;i++) { 3483 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3484 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3485 } 3486 } 3487 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3488 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3489 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3490 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3491 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3492 } 3493 #if 0 3494 { 3495 PetscViewer viewer; 3496 char filename[256]; 3497 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3498 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3499 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3500 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3501 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3502 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3503 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3504 if (save_change) { 3505 Mat phi_B; 3506 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3507 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3508 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3509 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3510 } else { 3511 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3512 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3513 } 3514 if (pcbddc->coarse_phi_D) { 3515 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3516 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3517 } 3518 if (pcbddc->coarse_psi_B) { 3519 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3520 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3521 } 3522 if (pcbddc->coarse_psi_D) { 3523 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3524 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3525 } 3526 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3527 } 3528 #endif 3529 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3530 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3531 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3532 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3533 3534 /* check constraints */ 3535 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3536 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3537 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3538 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3539 } else { 3540 PetscScalar *data; 3541 Mat tmat; 3542 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3543 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3544 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3545 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3546 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3547 } 3548 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3549 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3550 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3551 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3552 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3553 if (!pcbddc->symmetric_primal) { 3554 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3555 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3556 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3557 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3558 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3559 } 3560 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3561 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3562 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3563 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3564 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3565 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3566 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3567 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3568 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3569 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3570 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3571 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3572 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3573 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3574 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3575 if (!pcbddc->symmetric_primal) { 3576 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3577 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3578 } 3579 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3580 } 3581 /* get back data */ 3582 *coarse_submat_vals_n = coarse_submat_vals; 3583 PetscFunctionReturn(0); 3584 } 3585 3586 #undef __FUNCT__ 3587 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3588 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3589 { 3590 Mat *work_mat; 3591 IS isrow_s,iscol_s; 3592 PetscBool rsorted,csorted; 3593 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3594 PetscErrorCode ierr; 3595 3596 PetscFunctionBegin; 3597 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3598 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3599 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3600 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3601 3602 if (!rsorted) { 3603 const PetscInt *idxs; 3604 PetscInt *idxs_sorted,i; 3605 3606 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3607 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3608 for (i=0;i<rsize;i++) { 3609 idxs_perm_r[i] = i; 3610 } 3611 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3612 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3613 for (i=0;i<rsize;i++) { 3614 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3615 } 3616 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3617 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3618 } else { 3619 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3620 isrow_s = isrow; 3621 } 3622 3623 if (!csorted) { 3624 if (isrow == iscol) { 3625 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3626 iscol_s = isrow_s; 3627 } else { 3628 const PetscInt *idxs; 3629 PetscInt *idxs_sorted,i; 3630 3631 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3632 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3633 for (i=0;i<csize;i++) { 3634 idxs_perm_c[i] = i; 3635 } 3636 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3637 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3638 for (i=0;i<csize;i++) { 3639 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3640 } 3641 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3642 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3643 } 3644 } else { 3645 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3646 iscol_s = iscol; 3647 } 3648 3649 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3650 3651 if (!rsorted || !csorted) { 3652 Mat new_mat; 3653 IS is_perm_r,is_perm_c; 3654 3655 if (!rsorted) { 3656 PetscInt *idxs_r,i; 3657 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3658 for (i=0;i<rsize;i++) { 3659 idxs_r[idxs_perm_r[i]] = i; 3660 } 3661 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3662 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3663 } else { 3664 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3665 } 3666 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3667 3668 if (!csorted) { 3669 if (isrow_s == iscol_s) { 3670 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3671 is_perm_c = is_perm_r; 3672 } else { 3673 PetscInt *idxs_c,i; 3674 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3675 for (i=0;i<csize;i++) { 3676 idxs_c[idxs_perm_c[i]] = i; 3677 } 3678 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3679 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3680 } 3681 } else { 3682 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3683 } 3684 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3685 3686 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3687 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3688 work_mat[0] = new_mat; 3689 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3690 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3691 } 3692 3693 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 3694 *B = work_mat[0]; 3695 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 3696 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 3697 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 3698 PetscFunctionReturn(0); 3699 } 3700 3701 #undef __FUNCT__ 3702 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 3703 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 3704 { 3705 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3706 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3707 Mat new_mat; 3708 IS is_local,is_global; 3709 PetscInt local_size; 3710 PetscBool isseqaij; 3711 PetscErrorCode ierr; 3712 3713 PetscFunctionBegin; 3714 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3715 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 3716 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 3717 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 3718 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 3719 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 3720 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 3721 3722 /* check */ 3723 if (pcbddc->dbg_flag) { 3724 Vec x,x_change; 3725 PetscReal error; 3726 3727 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 3728 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3729 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 3730 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3731 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3732 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 3733 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3734 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3735 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3736 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3737 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3738 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 3739 ierr = VecDestroy(&x);CHKERRQ(ierr); 3740 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3741 } 3742 3743 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 3744 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3745 if (isseqaij) { 3746 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3747 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3748 } else { 3749 Mat work_mat; 3750 3751 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3752 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3753 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3754 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 3755 } 3756 if (matis->A->symmetric_set) { 3757 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 3758 #if !defined(PETSC_USE_COMPLEX) 3759 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 3760 #endif 3761 } 3762 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 3763 PetscFunctionReturn(0); 3764 } 3765 3766 #undef __FUNCT__ 3767 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 3768 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 3769 { 3770 PC_IS* pcis = (PC_IS*)(pc->data); 3771 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3772 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3773 PetscInt *idx_R_local=NULL; 3774 PetscInt n_vertices,i,j,n_R,n_D,n_B; 3775 PetscInt vbs,bs; 3776 PetscBT bitmask=NULL; 3777 PetscErrorCode ierr; 3778 3779 PetscFunctionBegin; 3780 /* 3781 No need to setup local scatters if 3782 - primal space is unchanged 3783 AND 3784 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 3785 AND 3786 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 3787 */ 3788 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 3789 PetscFunctionReturn(0); 3790 } 3791 /* destroy old objects */ 3792 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3793 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3794 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3795 /* Set Non-overlapping dimensions */ 3796 n_B = pcis->n_B; 3797 n_D = pcis->n - n_B; 3798 n_vertices = pcbddc->n_vertices; 3799 3800 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 3801 3802 /* create auxiliary bitmask and allocate workspace */ 3803 if (!sub_schurs || !sub_schurs->reuse_solver) { 3804 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 3805 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 3806 for (i=0;i<n_vertices;i++) { 3807 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 3808 } 3809 3810 for (i=0, n_R=0; i<pcis->n; i++) { 3811 if (!PetscBTLookup(bitmask,i)) { 3812 idx_R_local[n_R++] = i; 3813 } 3814 } 3815 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 3816 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3817 3818 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3819 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 3820 } 3821 3822 /* Block code */ 3823 vbs = 1; 3824 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 3825 if (bs>1 && !(n_vertices%bs)) { 3826 PetscBool is_blocked = PETSC_TRUE; 3827 PetscInt *vary; 3828 if (!sub_schurs || !sub_schurs->reuse_solver) { 3829 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 3830 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 3831 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 3832 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 3833 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 3834 for (i=0; i<pcis->n/bs; i++) { 3835 if (vary[i]!=0 && vary[i]!=bs) { 3836 is_blocked = PETSC_FALSE; 3837 break; 3838 } 3839 } 3840 ierr = PetscFree(vary);CHKERRQ(ierr); 3841 } else { 3842 /* Verify directly the R set */ 3843 for (i=0; i<n_R/bs; i++) { 3844 PetscInt j,node=idx_R_local[bs*i]; 3845 for (j=1; j<bs; j++) { 3846 if (node != idx_R_local[bs*i+j]-j) { 3847 is_blocked = PETSC_FALSE; 3848 break; 3849 } 3850 } 3851 } 3852 } 3853 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 3854 vbs = bs; 3855 for (i=0;i<n_R/vbs;i++) { 3856 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 3857 } 3858 } 3859 } 3860 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 3861 if (sub_schurs && sub_schurs->reuse_solver) { 3862 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3863 3864 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3865 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 3866 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 3867 reuse_solver->is_R = pcbddc->is_R_local; 3868 } else { 3869 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 3870 } 3871 3872 /* print some info if requested */ 3873 if (pcbddc->dbg_flag) { 3874 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3875 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3876 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3878 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3880 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3881 } 3882 3883 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3884 if (!sub_schurs || !sub_schurs->reuse_solver) { 3885 IS is_aux1,is_aux2; 3886 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3887 3888 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3889 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3890 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3891 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3892 for (i=0; i<n_D; i++) { 3893 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3894 } 3895 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3896 for (i=0, j=0; i<n_R; i++) { 3897 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3898 aux_array1[j++] = i; 3899 } 3900 } 3901 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3902 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3903 for (i=0, j=0; i<n_B; i++) { 3904 if (!PetscBTLookup(bitmask,is_indices[i])) { 3905 aux_array2[j++] = i; 3906 } 3907 } 3908 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3909 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3910 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3911 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3912 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3913 3914 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3915 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3916 for (i=0, j=0; i<n_R; i++) { 3917 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3918 aux_array1[j++] = i; 3919 } 3920 } 3921 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3922 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3923 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3924 } 3925 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3926 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3927 } else { 3928 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3929 IS tis; 3930 PetscInt schur_size; 3931 3932 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3933 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3934 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3935 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3936 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3937 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3938 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3939 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3940 } 3941 } 3942 PetscFunctionReturn(0); 3943 } 3944 3945 3946 #undef __FUNCT__ 3947 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3948 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3949 { 3950 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3951 PC_IS *pcis = (PC_IS*)pc->data; 3952 PC pc_temp; 3953 Mat A_RR; 3954 MatReuse reuse; 3955 PetscScalar m_one = -1.0; 3956 PetscReal value; 3957 PetscInt n_D,n_R; 3958 PetscBool check_corr[2],issbaij; 3959 PetscErrorCode ierr; 3960 /* prefixes stuff */ 3961 char dir_prefix[256],neu_prefix[256],str_level[16]; 3962 size_t len; 3963 3964 PetscFunctionBegin; 3965 3966 /* compute prefixes */ 3967 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3968 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3969 if (!pcbddc->current_level) { 3970 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3971 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3972 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3973 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3974 } else { 3975 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3976 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3977 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3978 len -= 15; /* remove "pc_bddc_coarse_" */ 3979 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3980 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3981 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3982 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3983 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3984 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3985 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3986 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3987 } 3988 3989 /* DIRICHLET PROBLEM */ 3990 if (dirichlet) { 3991 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3992 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3993 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3994 if (pcbddc->dbg_flag) { 3995 Mat A_IIn; 3996 3997 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3998 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3999 pcis->A_II = A_IIn; 4000 } 4001 } 4002 if (pcbddc->local_mat->symmetric_set) { 4003 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4004 } 4005 /* Matrix for Dirichlet problem is pcis->A_II */ 4006 n_D = pcis->n - pcis->n_B; 4007 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4008 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4009 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4010 /* default */ 4011 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4012 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4013 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4014 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4015 if (issbaij) { 4016 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4017 } else { 4018 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4019 } 4020 /* Allow user's customization */ 4021 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4022 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4023 } 4024 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4025 if (sub_schurs && sub_schurs->reuse_solver) { 4026 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4027 4028 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4029 } 4030 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4031 if (!n_D) { 4032 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4033 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4034 } 4035 /* Set Up KSP for Dirichlet problem of BDDC */ 4036 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4037 /* set ksp_D into pcis data */ 4038 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4039 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4040 pcis->ksp_D = pcbddc->ksp_D; 4041 } 4042 4043 /* NEUMANN PROBLEM */ 4044 A_RR = 0; 4045 if (neumann) { 4046 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4047 PetscInt ibs,mbs; 4048 PetscBool issbaij; 4049 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4050 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4051 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4052 if (pcbddc->ksp_R) { /* already created ksp */ 4053 PetscInt nn_R; 4054 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4055 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4056 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4057 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4058 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4059 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4060 reuse = MAT_INITIAL_MATRIX; 4061 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4062 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4063 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4064 reuse = MAT_INITIAL_MATRIX; 4065 } else { /* safe to reuse the matrix */ 4066 reuse = MAT_REUSE_MATRIX; 4067 } 4068 } 4069 /* last check */ 4070 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4071 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4072 reuse = MAT_INITIAL_MATRIX; 4073 } 4074 } else { /* first time, so we need to create the matrix */ 4075 reuse = MAT_INITIAL_MATRIX; 4076 } 4077 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4078 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4079 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4080 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4081 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4082 if (matis->A == pcbddc->local_mat) { 4083 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4084 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4085 } else { 4086 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4087 } 4088 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4089 if (matis->A == pcbddc->local_mat) { 4090 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4091 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4092 } else { 4093 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4094 } 4095 } 4096 /* extract A_RR */ 4097 if (sub_schurs && sub_schurs->reuse_solver) { 4098 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4099 4100 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4101 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4102 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4103 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4104 } else { 4105 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4106 } 4107 } else { 4108 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4109 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4110 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4111 } 4112 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4113 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4114 } 4115 if (pcbddc->local_mat->symmetric_set) { 4116 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4117 } 4118 if (!pcbddc->ksp_R) { /* create object if not present */ 4119 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4120 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4121 /* default */ 4122 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4123 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4124 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4125 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4126 if (issbaij) { 4127 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4128 } else { 4129 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4130 } 4131 /* Allow user's customization */ 4132 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4133 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4134 } 4135 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4136 if (!n_R) { 4137 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4138 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4139 } 4140 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4141 /* Reuse solver if it is present */ 4142 if (sub_schurs && sub_schurs->reuse_solver) { 4143 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4144 4145 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4146 } 4147 /* Set Up KSP for Neumann problem of BDDC */ 4148 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4149 } 4150 4151 if (pcbddc->dbg_flag) { 4152 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4153 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4155 } 4156 4157 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4158 check_corr[0] = check_corr[1] = PETSC_FALSE; 4159 if (pcbddc->NullSpace_corr[0]) { 4160 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4161 } 4162 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4163 check_corr[0] = PETSC_TRUE; 4164 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4165 } 4166 if (neumann && pcbddc->NullSpace_corr[2]) { 4167 check_corr[1] = PETSC_TRUE; 4168 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4169 } 4170 4171 /* check Dirichlet and Neumann solvers */ 4172 if (pcbddc->dbg_flag) { 4173 if (dirichlet) { /* Dirichlet */ 4174 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4175 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4176 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4177 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4178 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4179 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4180 if (check_corr[0]) { 4181 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4182 } 4183 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4184 } 4185 if (neumann) { /* Neumann */ 4186 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4187 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4188 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4189 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4190 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4191 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4192 if (check_corr[1]) { 4193 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4194 } 4195 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4196 } 4197 } 4198 /* free Neumann problem's matrix */ 4199 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4200 PetscFunctionReturn(0); 4201 } 4202 4203 #undef __FUNCT__ 4204 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4205 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4206 { 4207 PetscErrorCode ierr; 4208 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4209 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4210 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4211 4212 PetscFunctionBegin; 4213 if (!reuse_solver) { 4214 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4215 } 4216 if (!pcbddc->switch_static) { 4217 if (applytranspose && pcbddc->local_auxmat1) { 4218 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4219 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4220 } 4221 if (!reuse_solver) { 4222 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4223 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4224 } else { 4225 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4226 4227 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4228 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4229 } 4230 } else { 4231 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4232 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4233 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4234 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4235 if (applytranspose && pcbddc->local_auxmat1) { 4236 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4237 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4238 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4239 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4240 } 4241 } 4242 if (!reuse_solver || pcbddc->switch_static) { 4243 if (applytranspose) { 4244 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4245 } else { 4246 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4247 } 4248 } else { 4249 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4250 4251 if (applytranspose) { 4252 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4253 } else { 4254 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4255 } 4256 } 4257 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4258 if (!pcbddc->switch_static) { 4259 if (!reuse_solver) { 4260 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4261 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4262 } else { 4263 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4264 4265 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4266 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4267 } 4268 if (!applytranspose && pcbddc->local_auxmat1) { 4269 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4270 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4271 } 4272 } else { 4273 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4274 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4275 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4276 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4277 if (!applytranspose && pcbddc->local_auxmat1) { 4278 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4279 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4280 } 4281 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4282 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4283 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4284 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4285 } 4286 PetscFunctionReturn(0); 4287 } 4288 4289 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4290 #undef __FUNCT__ 4291 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4292 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4293 { 4294 PetscErrorCode ierr; 4295 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4296 PC_IS* pcis = (PC_IS*) (pc->data); 4297 const PetscScalar zero = 0.0; 4298 4299 PetscFunctionBegin; 4300 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4301 if (!pcbddc->benign_apply_coarse_only) { 4302 if (applytranspose) { 4303 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4304 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4305 } else { 4306 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4307 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4308 } 4309 } else { 4310 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4311 } 4312 4313 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4314 if (pcbddc->benign_n) { 4315 PetscScalar *array; 4316 PetscInt j; 4317 4318 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4319 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4320 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4321 } 4322 4323 /* start communications from local primal nodes to rhs of coarse solver */ 4324 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4325 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4326 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4327 4328 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4329 if (pcbddc->coarse_ksp) { 4330 Mat coarse_mat; 4331 Vec rhs,sol; 4332 MatNullSpace nullsp; 4333 PetscBool isbddc = PETSC_FALSE; 4334 4335 if (pcbddc->benign_have_null) { 4336 PC coarse_pc; 4337 4338 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4339 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4340 /* we need to propagate to coarser levels the need for a possible benign correction */ 4341 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4342 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4343 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4344 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4345 } 4346 } 4347 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4348 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4349 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4350 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4351 if (nullsp) { 4352 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4353 } 4354 if (applytranspose) { 4355 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4356 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4357 } else { 4358 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4359 PC coarse_pc; 4360 4361 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4362 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4363 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4364 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4365 } else { 4366 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4367 } 4368 } 4369 /* we don't need the benign correction at coarser levels anymore */ 4370 if (pcbddc->benign_have_null && isbddc) { 4371 PC coarse_pc; 4372 PC_BDDC* coarsepcbddc; 4373 4374 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4375 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4376 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4377 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4378 } 4379 if (nullsp) { 4380 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4381 } 4382 } 4383 4384 /* Local solution on R nodes */ 4385 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4386 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4387 } 4388 /* communications from coarse sol to local primal nodes */ 4389 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4390 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4391 4392 /* Sum contributions from the two levels */ 4393 if (!pcbddc->benign_apply_coarse_only) { 4394 if (applytranspose) { 4395 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4396 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4397 } else { 4398 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4399 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4400 } 4401 /* store p0 */ 4402 if (pcbddc->benign_n) { 4403 PetscScalar *array; 4404 PetscInt j; 4405 4406 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4407 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4408 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4409 } 4410 } else { /* expand the coarse solution */ 4411 if (applytranspose) { 4412 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4413 } else { 4414 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4415 } 4416 } 4417 PetscFunctionReturn(0); 4418 } 4419 4420 #undef __FUNCT__ 4421 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4422 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4423 { 4424 PetscErrorCode ierr; 4425 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4426 PetscScalar *array; 4427 Vec from,to; 4428 4429 PetscFunctionBegin; 4430 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4431 from = pcbddc->coarse_vec; 4432 to = pcbddc->vec1_P; 4433 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4434 Vec tvec; 4435 4436 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4437 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4438 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4439 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4440 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4441 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4442 } 4443 } else { /* from local to global -> put data in coarse right hand side */ 4444 from = pcbddc->vec1_P; 4445 to = pcbddc->coarse_vec; 4446 } 4447 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4448 PetscFunctionReturn(0); 4449 } 4450 4451 #undef __FUNCT__ 4452 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4453 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4454 { 4455 PetscErrorCode ierr; 4456 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4457 PetscScalar *array; 4458 Vec from,to; 4459 4460 PetscFunctionBegin; 4461 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4462 from = pcbddc->coarse_vec; 4463 to = pcbddc->vec1_P; 4464 } else { /* from local to global -> put data in coarse right hand side */ 4465 from = pcbddc->vec1_P; 4466 to = pcbddc->coarse_vec; 4467 } 4468 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4469 if (smode == SCATTER_FORWARD) { 4470 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4471 Vec tvec; 4472 4473 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4474 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4475 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4476 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4477 } 4478 } else { 4479 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4480 ierr = VecResetArray(from);CHKERRQ(ierr); 4481 } 4482 } 4483 PetscFunctionReturn(0); 4484 } 4485 4486 /* uncomment for testing purposes */ 4487 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4488 #undef __FUNCT__ 4489 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4490 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4491 { 4492 PetscErrorCode ierr; 4493 PC_IS* pcis = (PC_IS*)(pc->data); 4494 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4495 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4496 /* one and zero */ 4497 PetscScalar one=1.0,zero=0.0; 4498 /* space to store constraints and their local indices */ 4499 PetscScalar *constraints_data; 4500 PetscInt *constraints_idxs,*constraints_idxs_B; 4501 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4502 PetscInt *constraints_n; 4503 /* iterators */ 4504 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4505 /* BLAS integers */ 4506 PetscBLASInt lwork,lierr; 4507 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4508 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4509 /* reuse */ 4510 PetscInt olocal_primal_size,olocal_primal_size_cc; 4511 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4512 /* change of basis */ 4513 PetscBool qr_needed; 4514 PetscBT change_basis,qr_needed_idx; 4515 /* auxiliary stuff */ 4516 PetscInt *nnz,*is_indices; 4517 PetscInt ncc; 4518 /* some quantities */ 4519 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4520 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4521 4522 PetscFunctionBegin; 4523 /* Destroy Mat objects computed previously */ 4524 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4525 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4526 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4527 /* save info on constraints from previous setup (if any) */ 4528 olocal_primal_size = pcbddc->local_primal_size; 4529 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4530 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4531 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4532 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4533 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4534 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4535 4536 if (!pcbddc->adaptive_selection) { 4537 IS ISForVertices,*ISForFaces,*ISForEdges; 4538 MatNullSpace nearnullsp; 4539 const Vec *nearnullvecs; 4540 Vec *localnearnullsp; 4541 PetscScalar *array; 4542 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4543 PetscBool nnsp_has_cnst; 4544 /* LAPACK working arrays for SVD or POD */ 4545 PetscBool skip_lapack,boolforchange; 4546 PetscScalar *work; 4547 PetscReal *singular_vals; 4548 #if defined(PETSC_USE_COMPLEX) 4549 PetscReal *rwork; 4550 #endif 4551 #if defined(PETSC_MISSING_LAPACK_GESVD) 4552 PetscScalar *temp_basis,*correlation_mat; 4553 #else 4554 PetscBLASInt dummy_int=1; 4555 PetscScalar dummy_scalar=1.; 4556 #endif 4557 4558 /* Get index sets for faces, edges and vertices from graph */ 4559 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4560 /* print some info */ 4561 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 4562 PetscInt nv; 4563 4564 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4565 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4566 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4567 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4569 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4571 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4572 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4573 } 4574 4575 /* free unneeded index sets */ 4576 if (!pcbddc->use_vertices) { 4577 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4578 } 4579 if (!pcbddc->use_edges) { 4580 for (i=0;i<n_ISForEdges;i++) { 4581 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4582 } 4583 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4584 n_ISForEdges = 0; 4585 } 4586 if (!pcbddc->use_faces) { 4587 for (i=0;i<n_ISForFaces;i++) { 4588 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4589 } 4590 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4591 n_ISForFaces = 0; 4592 } 4593 4594 /* check if near null space is attached to global mat */ 4595 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4596 if (nearnullsp) { 4597 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4598 /* remove any stored info */ 4599 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4600 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4601 /* store information for BDDC solver reuse */ 4602 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4603 pcbddc->onearnullspace = nearnullsp; 4604 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4605 for (i=0;i<nnsp_size;i++) { 4606 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4607 } 4608 } else { /* if near null space is not provided BDDC uses constants by default */ 4609 nnsp_size = 0; 4610 nnsp_has_cnst = PETSC_TRUE; 4611 } 4612 /* get max number of constraints on a single cc */ 4613 max_constraints = nnsp_size; 4614 if (nnsp_has_cnst) max_constraints++; 4615 4616 /* 4617 Evaluate maximum storage size needed by the procedure 4618 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4619 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4620 There can be multiple constraints per connected component 4621 */ 4622 n_vertices = 0; 4623 if (ISForVertices) { 4624 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4625 } 4626 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4627 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4628 4629 total_counts = n_ISForFaces+n_ISForEdges; 4630 total_counts *= max_constraints; 4631 total_counts += n_vertices; 4632 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4633 4634 total_counts = 0; 4635 max_size_of_constraint = 0; 4636 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4637 IS used_is; 4638 if (i<n_ISForEdges) { 4639 used_is = ISForEdges[i]; 4640 } else { 4641 used_is = ISForFaces[i-n_ISForEdges]; 4642 } 4643 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4644 total_counts += j; 4645 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4646 } 4647 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4648 4649 /* get local part of global near null space vectors */ 4650 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4651 for (k=0;k<nnsp_size;k++) { 4652 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4653 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4654 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4655 } 4656 4657 /* whether or not to skip lapack calls */ 4658 skip_lapack = PETSC_TRUE; 4659 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4660 4661 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4662 if (!skip_lapack) { 4663 PetscScalar temp_work; 4664 4665 #if defined(PETSC_MISSING_LAPACK_GESVD) 4666 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4667 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4668 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4669 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4670 #if defined(PETSC_USE_COMPLEX) 4671 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4672 #endif 4673 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4674 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4675 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4676 lwork = -1; 4677 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4678 #if !defined(PETSC_USE_COMPLEX) 4679 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4680 #else 4681 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4682 #endif 4683 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4684 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4685 #else /* on missing GESVD */ 4686 /* SVD */ 4687 PetscInt max_n,min_n; 4688 max_n = max_size_of_constraint; 4689 min_n = max_constraints; 4690 if (max_size_of_constraint < max_constraints) { 4691 min_n = max_size_of_constraint; 4692 max_n = max_constraints; 4693 } 4694 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 4695 #if defined(PETSC_USE_COMPLEX) 4696 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 4697 #endif 4698 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4699 lwork = -1; 4700 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 4701 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 4702 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 4703 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4704 #if !defined(PETSC_USE_COMPLEX) 4705 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 4706 #else 4707 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 4708 #endif 4709 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4710 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 4711 #endif /* on missing GESVD */ 4712 /* Allocate optimal workspace */ 4713 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 4714 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 4715 } 4716 /* Now we can loop on constraining sets */ 4717 total_counts = 0; 4718 constraints_idxs_ptr[0] = 0; 4719 constraints_data_ptr[0] = 0; 4720 /* vertices */ 4721 if (n_vertices) { 4722 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4723 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4724 for (i=0;i<n_vertices;i++) { 4725 constraints_n[total_counts] = 1; 4726 constraints_data[total_counts] = 1.0; 4727 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 4728 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 4729 total_counts++; 4730 } 4731 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4732 n_vertices = total_counts; 4733 } 4734 4735 /* edges and faces */ 4736 total_counts_cc = total_counts; 4737 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 4738 IS used_is; 4739 PetscBool idxs_copied = PETSC_FALSE; 4740 4741 if (ncc<n_ISForEdges) { 4742 used_is = ISForEdges[ncc]; 4743 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 4744 } else { 4745 used_is = ISForFaces[ncc-n_ISForEdges]; 4746 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 4747 } 4748 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 4749 4750 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 4751 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4752 /* change of basis should not be performed on local periodic nodes */ 4753 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 4754 if (nnsp_has_cnst) { 4755 PetscScalar quad_value; 4756 4757 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4758 idxs_copied = PETSC_TRUE; 4759 4760 if (!pcbddc->use_nnsp_true) { 4761 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 4762 } else { 4763 quad_value = 1.0; 4764 } 4765 for (j=0;j<size_of_constraint;j++) { 4766 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 4767 } 4768 temp_constraints++; 4769 total_counts++; 4770 } 4771 for (k=0;k<nnsp_size;k++) { 4772 PetscReal real_value; 4773 PetscScalar *ptr_to_data; 4774 4775 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4776 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 4777 for (j=0;j<size_of_constraint;j++) { 4778 ptr_to_data[j] = array[is_indices[j]]; 4779 } 4780 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4781 /* check if array is null on the connected component */ 4782 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4783 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 4784 if (real_value > 0.0) { /* keep indices and values */ 4785 temp_constraints++; 4786 total_counts++; 4787 if (!idxs_copied) { 4788 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4789 idxs_copied = PETSC_TRUE; 4790 } 4791 } 4792 } 4793 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4794 valid_constraints = temp_constraints; 4795 if (!pcbddc->use_nnsp_true && temp_constraints) { 4796 if (temp_constraints == 1) { /* just normalize the constraint */ 4797 PetscScalar norm,*ptr_to_data; 4798 4799 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 4800 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4801 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 4802 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 4803 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 4804 } else { /* perform SVD */ 4805 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 4806 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 4807 4808 #if defined(PETSC_MISSING_LAPACK_GESVD) 4809 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 4810 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 4811 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 4812 the constraints basis will differ (by a complex factor with absolute value equal to 1) 4813 from that computed using LAPACKgesvd 4814 -> This is due to a different computation of eigenvectors in LAPACKheev 4815 -> The quality of the POD-computed basis will be the same */ 4816 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 4817 /* Store upper triangular part of correlation matrix */ 4818 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4819 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4820 for (j=0;j<temp_constraints;j++) { 4821 for (k=0;k<j+1;k++) { 4822 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 4823 } 4824 } 4825 /* compute eigenvalues and eigenvectors of correlation matrix */ 4826 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4827 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 4828 #if !defined(PETSC_USE_COMPLEX) 4829 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 4830 #else 4831 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 4832 #endif 4833 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4834 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 4835 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 4836 j = 0; 4837 while (j < temp_constraints && singular_vals[j] < tol) j++; 4838 total_counts = total_counts-j; 4839 valid_constraints = temp_constraints-j; 4840 /* scale and copy POD basis into used quadrature memory */ 4841 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4842 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4843 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 4844 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4845 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 4846 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4847 if (j<temp_constraints) { 4848 PetscInt ii; 4849 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 4850 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4851 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 4852 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4853 for (k=0;k<temp_constraints-j;k++) { 4854 for (ii=0;ii<size_of_constraint;ii++) { 4855 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 4856 } 4857 } 4858 } 4859 #else /* on missing GESVD */ 4860 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4861 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4862 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4863 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4864 #if !defined(PETSC_USE_COMPLEX) 4865 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 4866 #else 4867 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 4868 #endif 4869 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 4870 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4871 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 4872 k = temp_constraints; 4873 if (k > size_of_constraint) k = size_of_constraint; 4874 j = 0; 4875 while (j < k && singular_vals[k-j-1] < tol) j++; 4876 valid_constraints = k-j; 4877 total_counts = total_counts-temp_constraints+valid_constraints; 4878 #endif /* on missing GESVD */ 4879 } 4880 } 4881 /* update pointers information */ 4882 if (valid_constraints) { 4883 constraints_n[total_counts_cc] = valid_constraints; 4884 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4885 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4886 /* set change_of_basis flag */ 4887 if (boolforchange) { 4888 PetscBTSet(change_basis,total_counts_cc); 4889 } 4890 total_counts_cc++; 4891 } 4892 } 4893 /* free workspace */ 4894 if (!skip_lapack) { 4895 ierr = PetscFree(work);CHKERRQ(ierr); 4896 #if defined(PETSC_USE_COMPLEX) 4897 ierr = PetscFree(rwork);CHKERRQ(ierr); 4898 #endif 4899 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4900 #if defined(PETSC_MISSING_LAPACK_GESVD) 4901 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4902 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4903 #endif 4904 } 4905 for (k=0;k<nnsp_size;k++) { 4906 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4907 } 4908 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4909 /* free index sets of faces, edges and vertices */ 4910 for (i=0;i<n_ISForFaces;i++) { 4911 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4912 } 4913 if (n_ISForFaces) { 4914 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4915 } 4916 for (i=0;i<n_ISForEdges;i++) { 4917 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4918 } 4919 if (n_ISForEdges) { 4920 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4921 } 4922 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4923 } else { 4924 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4925 4926 total_counts = 0; 4927 n_vertices = 0; 4928 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4929 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4930 } 4931 max_constraints = 0; 4932 total_counts_cc = 0; 4933 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4934 total_counts += pcbddc->adaptive_constraints_n[i]; 4935 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4936 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4937 } 4938 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4939 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4940 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4941 constraints_data = pcbddc->adaptive_constraints_data; 4942 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4943 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4944 total_counts_cc = 0; 4945 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4946 if (pcbddc->adaptive_constraints_n[i]) { 4947 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4948 } 4949 } 4950 #if 0 4951 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4952 for (i=0;i<total_counts_cc;i++) { 4953 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4954 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4955 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4956 printf(" %d",constraints_idxs[j]); 4957 } 4958 printf("\n"); 4959 printf("number of cc: %d\n",constraints_n[i]); 4960 } 4961 for (i=0;i<n_vertices;i++) { 4962 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4963 } 4964 for (i=0;i<sub_schurs->n_subs;i++) { 4965 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4966 } 4967 #endif 4968 4969 max_size_of_constraint = 0; 4970 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4971 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4972 /* Change of basis */ 4973 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4974 if (pcbddc->use_change_of_basis) { 4975 for (i=0;i<sub_schurs->n_subs;i++) { 4976 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4977 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4978 } 4979 } 4980 } 4981 } 4982 pcbddc->local_primal_size = total_counts; 4983 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4984 4985 /* map constraints_idxs in boundary numbering */ 4986 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4987 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4988 4989 /* Create constraint matrix */ 4990 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4991 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4992 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4993 4994 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4995 /* determine if a QR strategy is needed for change of basis */ 4996 qr_needed = PETSC_FALSE; 4997 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4998 total_primal_vertices=0; 4999 pcbddc->local_primal_size_cc = 0; 5000 for (i=0;i<total_counts_cc;i++) { 5001 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5002 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5003 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5004 pcbddc->local_primal_size_cc += 1; 5005 } else if (PetscBTLookup(change_basis,i)) { 5006 for (k=0;k<constraints_n[i];k++) { 5007 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5008 } 5009 pcbddc->local_primal_size_cc += constraints_n[i]; 5010 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5011 PetscBTSet(qr_needed_idx,i); 5012 qr_needed = PETSC_TRUE; 5013 } 5014 } else { 5015 pcbddc->local_primal_size_cc += 1; 5016 } 5017 } 5018 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5019 pcbddc->n_vertices = total_primal_vertices; 5020 /* permute indices in order to have a sorted set of vertices */ 5021 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5022 5023 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5024 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5025 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5026 5027 /* nonzero structure of constraint matrix */ 5028 /* and get reference dof for local constraints */ 5029 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5030 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5031 5032 j = total_primal_vertices; 5033 total_counts = total_primal_vertices; 5034 cum = total_primal_vertices; 5035 for (i=n_vertices;i<total_counts_cc;i++) { 5036 if (!PetscBTLookup(change_basis,i)) { 5037 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5038 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5039 cum++; 5040 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5041 for (k=0;k<constraints_n[i];k++) { 5042 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5043 nnz[j+k] = size_of_constraint; 5044 } 5045 j += constraints_n[i]; 5046 } 5047 } 5048 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5049 ierr = PetscFree(nnz);CHKERRQ(ierr); 5050 5051 /* set values in constraint matrix */ 5052 for (i=0;i<total_primal_vertices;i++) { 5053 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5054 } 5055 total_counts = total_primal_vertices; 5056 for (i=n_vertices;i<total_counts_cc;i++) { 5057 if (!PetscBTLookup(change_basis,i)) { 5058 PetscInt *cols; 5059 5060 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5061 cols = constraints_idxs+constraints_idxs_ptr[i]; 5062 for (k=0;k<constraints_n[i];k++) { 5063 PetscInt row = total_counts+k; 5064 PetscScalar *vals; 5065 5066 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5067 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5068 } 5069 total_counts += constraints_n[i]; 5070 } 5071 } 5072 /* assembling */ 5073 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5074 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5075 5076 /* 5077 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5078 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5079 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5080 */ 5081 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5082 if (pcbddc->use_change_of_basis) { 5083 /* dual and primal dofs on a single cc */ 5084 PetscInt dual_dofs,primal_dofs; 5085 /* working stuff for GEQRF */ 5086 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5087 PetscBLASInt lqr_work; 5088 /* working stuff for UNGQR */ 5089 PetscScalar *gqr_work,lgqr_work_t; 5090 PetscBLASInt lgqr_work; 5091 /* working stuff for TRTRS */ 5092 PetscScalar *trs_rhs; 5093 PetscBLASInt Blas_NRHS; 5094 /* pointers for values insertion into change of basis matrix */ 5095 PetscInt *start_rows,*start_cols; 5096 PetscScalar *start_vals; 5097 /* working stuff for values insertion */ 5098 PetscBT is_primal; 5099 PetscInt *aux_primal_numbering_B; 5100 /* matrix sizes */ 5101 PetscInt global_size,local_size; 5102 /* temporary change of basis */ 5103 Mat localChangeOfBasisMatrix; 5104 /* extra space for debugging */ 5105 PetscScalar *dbg_work; 5106 5107 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5108 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5109 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5110 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5111 /* nonzeros for local mat */ 5112 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5113 if (!pcbddc->benign_change || pcbddc->fake_change) { 5114 for (i=0;i<pcis->n;i++) nnz[i]=1; 5115 } else { 5116 const PetscInt *ii; 5117 PetscInt n; 5118 PetscBool flg_row; 5119 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5120 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5121 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5122 } 5123 for (i=n_vertices;i<total_counts_cc;i++) { 5124 if (PetscBTLookup(change_basis,i)) { 5125 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5126 if (PetscBTLookup(qr_needed_idx,i)) { 5127 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5128 } else { 5129 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5130 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5131 } 5132 } 5133 } 5134 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5135 ierr = PetscFree(nnz);CHKERRQ(ierr); 5136 /* Set interior change in the matrix */ 5137 if (!pcbddc->benign_change || pcbddc->fake_change) { 5138 for (i=0;i<pcis->n;i++) { 5139 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5140 } 5141 } else { 5142 const PetscInt *ii,*jj; 5143 PetscScalar *aa; 5144 PetscInt n; 5145 PetscBool flg_row; 5146 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5147 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5148 for (i=0;i<n;i++) { 5149 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5150 } 5151 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5152 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5153 } 5154 5155 if (pcbddc->dbg_flag) { 5156 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5157 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5158 } 5159 5160 5161 /* Now we loop on the constraints which need a change of basis */ 5162 /* 5163 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5164 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5165 5166 Basic blocks of change of basis matrix T computed by 5167 5168 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5169 5170 | 1 0 ... 0 s_1/S | 5171 | 0 1 ... 0 s_2/S | 5172 | ... | 5173 | 0 ... 1 s_{n-1}/S | 5174 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5175 5176 with S = \sum_{i=1}^n s_i^2 5177 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5178 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5179 5180 - QR decomposition of constraints otherwise 5181 */ 5182 if (qr_needed) { 5183 /* space to store Q */ 5184 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5185 /* array to store scaling factors for reflectors */ 5186 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5187 /* first we issue queries for optimal work */ 5188 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5189 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5190 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5191 lqr_work = -1; 5192 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5193 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5194 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5195 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5196 lgqr_work = -1; 5197 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5198 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5199 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5200 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5201 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5202 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5203 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5204 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5205 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5206 /* array to store rhs and solution of triangular solver */ 5207 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5208 /* allocating workspace for check */ 5209 if (pcbddc->dbg_flag) { 5210 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5211 } 5212 } 5213 /* array to store whether a node is primal or not */ 5214 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5215 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5216 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5217 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5218 for (i=0;i<total_primal_vertices;i++) { 5219 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5220 } 5221 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5222 5223 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5224 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5225 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5226 if (PetscBTLookup(change_basis,total_counts)) { 5227 /* get constraint info */ 5228 primal_dofs = constraints_n[total_counts]; 5229 dual_dofs = size_of_constraint-primal_dofs; 5230 5231 if (pcbddc->dbg_flag) { 5232 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5233 } 5234 5235 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5236 5237 /* copy quadrature constraints for change of basis check */ 5238 if (pcbddc->dbg_flag) { 5239 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5240 } 5241 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5242 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5243 5244 /* compute QR decomposition of constraints */ 5245 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5246 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5247 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5248 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5249 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5250 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5251 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5252 5253 /* explictly compute R^-T */ 5254 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5255 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5256 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5257 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5258 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5259 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5260 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5261 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5262 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5263 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5264 5265 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5266 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5267 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5268 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5269 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5270 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5271 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5272 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5273 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5274 5275 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5276 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5277 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5278 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5279 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5280 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5281 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5282 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5283 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5284 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5285 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5286 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5287 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5288 5289 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5290 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5291 /* insert cols for primal dofs */ 5292 for (j=0;j<primal_dofs;j++) { 5293 start_vals = &qr_basis[j*size_of_constraint]; 5294 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5295 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5296 } 5297 /* insert cols for dual dofs */ 5298 for (j=0,k=0;j<dual_dofs;k++) { 5299 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5300 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5301 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5302 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5303 j++; 5304 } 5305 } 5306 5307 /* check change of basis */ 5308 if (pcbddc->dbg_flag) { 5309 PetscInt ii,jj; 5310 PetscBool valid_qr=PETSC_TRUE; 5311 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5312 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5313 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5314 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5315 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5316 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5317 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5318 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5319 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5320 for (jj=0;jj<size_of_constraint;jj++) { 5321 for (ii=0;ii<primal_dofs;ii++) { 5322 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5323 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5324 } 5325 } 5326 if (!valid_qr) { 5327 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5328 for (jj=0;jj<size_of_constraint;jj++) { 5329 for (ii=0;ii<primal_dofs;ii++) { 5330 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5331 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5332 } 5333 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5334 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5335 } 5336 } 5337 } 5338 } else { 5339 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5340 } 5341 } 5342 } else { /* simple transformation block */ 5343 PetscInt row,col; 5344 PetscScalar val,norm; 5345 5346 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5347 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5348 for (j=0;j<size_of_constraint;j++) { 5349 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5350 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5351 if (!PetscBTLookup(is_primal,row_B)) { 5352 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5353 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5354 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5355 } else { 5356 for (k=0;k<size_of_constraint;k++) { 5357 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5358 if (row != col) { 5359 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5360 } else { 5361 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5362 } 5363 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5364 } 5365 } 5366 } 5367 if (pcbddc->dbg_flag) { 5368 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5369 } 5370 } 5371 } else { 5372 if (pcbddc->dbg_flag) { 5373 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5374 } 5375 } 5376 } 5377 5378 /* free workspace */ 5379 if (qr_needed) { 5380 if (pcbddc->dbg_flag) { 5381 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5382 } 5383 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5384 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5385 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5386 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5387 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5388 } 5389 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5390 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5391 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5392 5393 /* assembling of global change of variable */ 5394 if (!pcbddc->fake_change) { 5395 Mat tmat; 5396 PetscInt bs; 5397 5398 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5399 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5400 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5401 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5402 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5403 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5404 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5405 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5406 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5407 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5408 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5409 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5410 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5411 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5412 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5413 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5414 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5415 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5416 5417 /* check */ 5418 if (pcbddc->dbg_flag) { 5419 PetscReal error; 5420 Vec x,x_change; 5421 5422 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5423 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5424 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5425 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5426 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5427 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5428 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5429 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5430 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5431 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5432 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5433 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5434 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5435 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5436 ierr = VecDestroy(&x);CHKERRQ(ierr); 5437 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5438 } 5439 /* adapt sub_schurs computed (if any) */ 5440 if (pcbddc->use_deluxe_scaling) { 5441 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5442 5443 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5444 if (sub_schurs && sub_schurs->S_Ej_all) { 5445 Mat S_new,tmat; 5446 IS is_all_N,is_V_Sall = NULL; 5447 5448 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5449 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5450 if (pcbddc->deluxe_zerorows) { 5451 ISLocalToGlobalMapping NtoSall; 5452 IS is_V; 5453 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5454 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5455 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5456 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5457 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5458 } 5459 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5460 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5461 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5462 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5463 if (pcbddc->deluxe_zerorows) { 5464 const PetscScalar *array; 5465 const PetscInt *idxs_V,*idxs_all; 5466 PetscInt i,n_V; 5467 5468 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5469 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5470 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5471 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5472 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5473 for (i=0;i<n_V;i++) { 5474 PetscScalar val; 5475 PetscInt idx; 5476 5477 idx = idxs_V[i]; 5478 val = array[idxs_all[idxs_V[i]]]; 5479 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5480 } 5481 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5482 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5483 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5484 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5485 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5486 } 5487 sub_schurs->S_Ej_all = S_new; 5488 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5489 if (sub_schurs->sum_S_Ej_all) { 5490 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5491 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5492 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5493 if (pcbddc->deluxe_zerorows) { 5494 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5495 } 5496 sub_schurs->sum_S_Ej_all = S_new; 5497 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5498 } 5499 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5500 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5501 } 5502 /* destroy any change of basis context in sub_schurs */ 5503 if (sub_schurs && sub_schurs->change) { 5504 PetscInt i; 5505 5506 for (i=0;i<sub_schurs->n_subs;i++) { 5507 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5508 } 5509 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5510 } 5511 } 5512 if (pcbddc->switch_static) { /* need to save the local change */ 5513 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5514 } else { 5515 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5516 } 5517 /* determine if any process has changed the pressures locally */ 5518 pcbddc->change_interior = pcbddc->benign_have_null; 5519 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5520 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5521 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5522 pcbddc->use_qr_single = qr_needed; 5523 } 5524 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5525 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5526 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5527 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5528 } else { 5529 Mat benign_global = NULL; 5530 if (pcbddc->benign_have_null) { 5531 Mat tmat; 5532 5533 pcbddc->change_interior = PETSC_TRUE; 5534 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5535 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5536 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5537 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5538 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5539 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5540 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5541 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5542 if (pcbddc->benign_change) { 5543 Mat M; 5544 5545 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5546 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5547 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5548 ierr = MatDestroy(&M);CHKERRQ(ierr); 5549 } else { 5550 Mat eye; 5551 PetscScalar *array; 5552 5553 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5554 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5555 for (i=0;i<pcis->n;i++) { 5556 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5557 } 5558 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5559 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5560 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5561 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5562 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5563 } 5564 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5565 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5566 } 5567 if (pcbddc->user_ChangeOfBasisMatrix) { 5568 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5569 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5570 } else if (pcbddc->benign_have_null) { 5571 pcbddc->ChangeOfBasisMatrix = benign_global; 5572 } 5573 } 5574 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5575 IS is_global; 5576 const PetscInt *gidxs; 5577 5578 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5579 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5580 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5581 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5582 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5583 } 5584 } 5585 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5586 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5587 } 5588 5589 if (!pcbddc->fake_change) { 5590 /* add pressure dofs to set of primal nodes for numbering purposes */ 5591 for (i=0;i<pcbddc->benign_n;i++) { 5592 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5593 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5594 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5595 pcbddc->local_primal_size_cc++; 5596 pcbddc->local_primal_size++; 5597 } 5598 5599 /* check if a new primal space has been introduced (also take into account benign trick) */ 5600 pcbddc->new_primal_space_local = PETSC_TRUE; 5601 if (olocal_primal_size == pcbddc->local_primal_size) { 5602 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5603 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5604 if (!pcbddc->new_primal_space_local) { 5605 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5606 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5607 } 5608 } 5609 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5610 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5611 } 5612 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5613 5614 /* flush dbg viewer */ 5615 if (pcbddc->dbg_flag) { 5616 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5617 } 5618 5619 /* free workspace */ 5620 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5621 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5622 if (!pcbddc->adaptive_selection) { 5623 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5624 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5625 } else { 5626 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5627 pcbddc->adaptive_constraints_idxs_ptr, 5628 pcbddc->adaptive_constraints_data_ptr, 5629 pcbddc->adaptive_constraints_idxs, 5630 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5631 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5632 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5633 } 5634 PetscFunctionReturn(0); 5635 } 5636 5637 #undef __FUNCT__ 5638 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5639 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5640 { 5641 ISLocalToGlobalMapping map; 5642 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5643 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5644 PetscInt ierr,i,N; 5645 5646 PetscFunctionBegin; 5647 if (pcbddc->graphanalyzed && !pcbddc->recompute_topography) PetscFunctionReturn(0); 5648 /* Reset previously computed graph */ 5649 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5650 /* Init local Graph struct */ 5651 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5652 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5653 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5654 5655 /* Check validity of the csr graph passed in by the user */ 5656 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5657 5658 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5659 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5660 PetscInt *xadj,*adjncy; 5661 PetscInt nvtxs; 5662 PetscBool flg_row=PETSC_FALSE; 5663 5664 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5665 if (flg_row) { 5666 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5667 pcbddc->computed_rowadj = PETSC_TRUE; 5668 } 5669 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5670 } 5671 if (pcbddc->dbg_flag) { 5672 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5673 } 5674 5675 /* Setup of Graph */ 5676 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5677 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5678 5679 /* attach info on disconnected subdomains if present */ 5680 if (pcbddc->n_local_subs) { 5681 PetscInt *local_subs; 5682 5683 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5684 for (i=0;i<pcbddc->n_local_subs;i++) { 5685 const PetscInt *idxs; 5686 PetscInt nl,j; 5687 5688 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5689 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5690 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 5691 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5692 } 5693 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 5694 pcbddc->mat_graph->local_subs = local_subs; 5695 } 5696 5697 /* Graph's connected components analysis */ 5698 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 5699 5700 /* set flag indicating analysis has been done */ 5701 pcbddc->graphanalyzed = PETSC_TRUE; 5702 PetscFunctionReturn(0); 5703 } 5704 5705 #undef __FUNCT__ 5706 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 5707 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 5708 { 5709 PetscInt i,j; 5710 PetscScalar *alphas; 5711 PetscErrorCode ierr; 5712 5713 PetscFunctionBegin; 5714 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 5715 for (i=0;i<n;i++) { 5716 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 5717 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 5718 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 5719 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5720 } 5721 ierr = PetscFree(alphas);CHKERRQ(ierr); 5722 PetscFunctionReturn(0); 5723 } 5724 5725 #undef __FUNCT__ 5726 #define __FUNCT__ "MatISGetSubassemblingPattern" 5727 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5728 { 5729 Mat A; 5730 PetscInt n_neighs,*neighs,*n_shared,**shared; 5731 PetscMPIInt size,rank,color; 5732 PetscInt *xadj,*adjncy; 5733 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5734 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5735 PetscInt void_procs,*procs_candidates = NULL; 5736 PetscInt xadj_count, *count; 5737 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5738 PetscSubcomm psubcomm; 5739 MPI_Comm subcomm; 5740 PetscErrorCode ierr; 5741 5742 PetscFunctionBegin; 5743 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5744 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5745 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5746 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5747 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5748 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5749 5750 if (have_void) *have_void = PETSC_FALSE; 5751 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5752 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5753 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5754 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5755 im_active = !!(n); 5756 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5757 void_procs = size - active_procs; 5758 /* get ranks of of non-active processes in mat communicator */ 5759 if (void_procs) { 5760 PetscInt ncand; 5761 5762 if (have_void) *have_void = PETSC_TRUE; 5763 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5764 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5765 for (i=0,ncand=0;i<size;i++) { 5766 if (!procs_candidates[i]) { 5767 procs_candidates[ncand++] = i; 5768 } 5769 } 5770 /* force n_subdomains to be not greater that the number of non-active processes */ 5771 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5772 } 5773 5774 /* number of subdomains requested greater than active processes -> just shift the matrix 5775 number of subdomains requested 1 -> send to master or first candidate in voids */ 5776 if (active_procs < *n_subdomains || *n_subdomains == 1) { 5777 PetscInt issize,isidx,dest; 5778 if (*n_subdomains == 1) dest = 0; 5779 else dest = rank; 5780 if (im_active) { 5781 issize = 1; 5782 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5783 isidx = procs_candidates[dest]; 5784 } else { 5785 isidx = dest; 5786 } 5787 } else { 5788 issize = 0; 5789 isidx = -1; 5790 } 5791 *n_subdomains = active_procs; 5792 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 5793 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5794 PetscFunctionReturn(0); 5795 } 5796 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 5797 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 5798 threshold = PetscMax(threshold,2); 5799 5800 /* Get info on mapping */ 5801 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 5802 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5803 5804 /* build local CSR graph of subdomains' connectivity */ 5805 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 5806 xadj[0] = 0; 5807 xadj[1] = PetscMax(n_neighs-1,0); 5808 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 5809 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 5810 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 5811 for (i=1;i<n_neighs;i++) 5812 for (j=0;j<n_shared[i];j++) 5813 count[shared[i][j]] += 1; 5814 5815 xadj_count = 0; 5816 for (i=1;i<n_neighs;i++) { 5817 for (j=0;j<n_shared[i];j++) { 5818 if (count[shared[i][j]] < threshold) { 5819 adjncy[xadj_count] = neighs[i]; 5820 adjncy_wgt[xadj_count] = n_shared[i]; 5821 xadj_count++; 5822 break; 5823 } 5824 } 5825 } 5826 xadj[1] = xadj_count; 5827 ierr = PetscFree(count);CHKERRQ(ierr); 5828 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5829 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5830 5831 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 5832 5833 /* Restrict work on active processes only */ 5834 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 5835 if (void_procs) { 5836 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 5837 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 5838 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 5839 subcomm = PetscSubcommChild(psubcomm); 5840 } else { 5841 psubcomm = NULL; 5842 subcomm = PetscObjectComm((PetscObject)mat); 5843 } 5844 5845 v_wgt = NULL; 5846 if (!color) { 5847 ierr = PetscFree(xadj);CHKERRQ(ierr); 5848 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5849 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5850 } else { 5851 Mat subdomain_adj; 5852 IS new_ranks,new_ranks_contig; 5853 MatPartitioning partitioner; 5854 PetscInt rstart=0,rend=0; 5855 PetscInt *is_indices,*oldranks; 5856 PetscMPIInt size; 5857 PetscBool aggregate; 5858 5859 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5860 if (void_procs) { 5861 PetscInt prank = rank; 5862 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5863 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5864 for (i=0;i<xadj[1];i++) { 5865 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5866 } 5867 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5868 } else { 5869 oldranks = NULL; 5870 } 5871 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5872 if (aggregate) { /* TODO: all this part could be made more efficient */ 5873 PetscInt lrows,row,ncols,*cols; 5874 PetscMPIInt nrank; 5875 PetscScalar *vals; 5876 5877 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5878 lrows = 0; 5879 if (nrank<redprocs) { 5880 lrows = size/redprocs; 5881 if (nrank<size%redprocs) lrows++; 5882 } 5883 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5884 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5885 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5886 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5887 row = nrank; 5888 ncols = xadj[1]-xadj[0]; 5889 cols = adjncy; 5890 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5891 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5892 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5893 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5894 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5895 ierr = PetscFree(xadj);CHKERRQ(ierr); 5896 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5897 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5898 ierr = PetscFree(vals);CHKERRQ(ierr); 5899 if (use_vwgt) { 5900 Vec v; 5901 const PetscScalar *array; 5902 PetscInt nl; 5903 5904 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5905 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5906 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5907 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5908 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5909 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5910 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5911 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5912 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5913 ierr = VecDestroy(&v);CHKERRQ(ierr); 5914 } 5915 } else { 5916 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5917 if (use_vwgt) { 5918 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5919 v_wgt[0] = local_size; 5920 } 5921 } 5922 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5923 5924 /* Partition */ 5925 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5926 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5927 if (v_wgt) { 5928 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5929 } 5930 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5931 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5932 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5933 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5934 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5935 5936 /* renumber new_ranks to avoid "holes" in new set of processors */ 5937 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5938 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5939 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5940 if (!aggregate) { 5941 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5942 #if defined(PETSC_USE_DEBUG) 5943 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5944 #endif 5945 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5946 } else if (oldranks) { 5947 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5948 } else { 5949 ranks_send_to_idx[0] = is_indices[0]; 5950 } 5951 } else { 5952 PetscInt idxs[1]; 5953 PetscMPIInt tag; 5954 MPI_Request *reqs; 5955 5956 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5957 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5958 for (i=rstart;i<rend;i++) { 5959 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5960 } 5961 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5962 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5963 ierr = PetscFree(reqs);CHKERRQ(ierr); 5964 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5965 #if defined(PETSC_USE_DEBUG) 5966 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5967 #endif 5968 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5969 } else if (oldranks) { 5970 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5971 } else { 5972 ranks_send_to_idx[0] = idxs[0]; 5973 } 5974 } 5975 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5976 /* clean up */ 5977 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5978 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5979 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5980 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5981 } 5982 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5983 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5984 5985 /* assemble parallel IS for sends */ 5986 i = 1; 5987 if (!color) i=0; 5988 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5989 PetscFunctionReturn(0); 5990 } 5991 5992 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5993 5994 #undef __FUNCT__ 5995 #define __FUNCT__ "MatISSubassemble" 5996 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5997 { 5998 Mat local_mat; 5999 IS is_sends_internal; 6000 PetscInt rows,cols,new_local_rows; 6001 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6002 PetscBool ismatis,isdense,newisdense,destroy_mat; 6003 ISLocalToGlobalMapping l2gmap; 6004 PetscInt* l2gmap_indices; 6005 const PetscInt* is_indices; 6006 MatType new_local_type; 6007 /* buffers */ 6008 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6009 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6010 PetscInt *recv_buffer_idxs_local; 6011 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6012 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6013 /* MPI */ 6014 MPI_Comm comm,comm_n; 6015 PetscSubcomm subcomm; 6016 PetscMPIInt n_sends,n_recvs,commsize; 6017 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6018 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6019 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6020 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6021 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6022 PetscErrorCode ierr; 6023 6024 PetscFunctionBegin; 6025 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6026 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6027 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6028 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6029 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6030 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6031 PetscValidLogicalCollectiveBool(mat,reuse,6); 6032 PetscValidLogicalCollectiveInt(mat,nis,8); 6033 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6034 if (nvecs) { 6035 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6036 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6037 } 6038 /* further checks */ 6039 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6040 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6041 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6042 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6043 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6044 if (reuse && *mat_n) { 6045 PetscInt mrows,mcols,mnrows,mncols; 6046 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6047 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6048 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6049 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6050 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6051 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6052 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6053 } 6054 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6055 PetscValidLogicalCollectiveInt(mat,bs,0); 6056 6057 /* prepare IS for sending if not provided */ 6058 if (!is_sends) { 6059 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6060 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6061 } else { 6062 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6063 is_sends_internal = is_sends; 6064 } 6065 6066 /* get comm */ 6067 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6068 6069 /* compute number of sends */ 6070 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6071 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6072 6073 /* compute number of receives */ 6074 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6075 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6076 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6077 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6078 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6079 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6080 ierr = PetscFree(iflags);CHKERRQ(ierr); 6081 6082 /* restrict comm if requested */ 6083 subcomm = 0; 6084 destroy_mat = PETSC_FALSE; 6085 if (restrict_comm) { 6086 PetscMPIInt color,subcommsize; 6087 6088 color = 0; 6089 if (restrict_full) { 6090 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6091 } else { 6092 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6093 } 6094 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6095 subcommsize = commsize - subcommsize; 6096 /* check if reuse has been requested */ 6097 if (reuse) { 6098 if (*mat_n) { 6099 PetscMPIInt subcommsize2; 6100 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6101 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6102 comm_n = PetscObjectComm((PetscObject)*mat_n); 6103 } else { 6104 comm_n = PETSC_COMM_SELF; 6105 } 6106 } else { /* MAT_INITIAL_MATRIX */ 6107 PetscMPIInt rank; 6108 6109 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6110 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6111 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6112 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6113 comm_n = PetscSubcommChild(subcomm); 6114 } 6115 /* flag to destroy *mat_n if not significative */ 6116 if (color) destroy_mat = PETSC_TRUE; 6117 } else { 6118 comm_n = comm; 6119 } 6120 6121 /* prepare send/receive buffers */ 6122 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6123 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6124 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6125 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6126 if (nis) { 6127 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6128 } 6129 6130 /* Get data from local matrices */ 6131 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6132 /* TODO: See below some guidelines on how to prepare the local buffers */ 6133 /* 6134 send_buffer_vals should contain the raw values of the local matrix 6135 send_buffer_idxs should contain: 6136 - MatType_PRIVATE type 6137 - PetscInt size_of_l2gmap 6138 - PetscInt global_row_indices[size_of_l2gmap] 6139 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6140 */ 6141 else { 6142 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6143 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6144 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6145 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6146 send_buffer_idxs[1] = i; 6147 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6148 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6149 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6150 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6151 for (i=0;i<n_sends;i++) { 6152 ilengths_vals[is_indices[i]] = len*len; 6153 ilengths_idxs[is_indices[i]] = len+2; 6154 } 6155 } 6156 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6157 /* additional is (if any) */ 6158 if (nis) { 6159 PetscMPIInt psum; 6160 PetscInt j; 6161 for (j=0,psum=0;j<nis;j++) { 6162 PetscInt plen; 6163 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6164 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6165 psum += len+1; /* indices + lenght */ 6166 } 6167 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6168 for (j=0,psum=0;j<nis;j++) { 6169 PetscInt plen; 6170 const PetscInt *is_array_idxs; 6171 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6172 send_buffer_idxs_is[psum] = plen; 6173 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6174 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6175 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6176 psum += plen+1; /* indices + lenght */ 6177 } 6178 for (i=0;i<n_sends;i++) { 6179 ilengths_idxs_is[is_indices[i]] = psum; 6180 } 6181 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6182 } 6183 6184 buf_size_idxs = 0; 6185 buf_size_vals = 0; 6186 buf_size_idxs_is = 0; 6187 buf_size_vecs = 0; 6188 for (i=0;i<n_recvs;i++) { 6189 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6190 buf_size_vals += (PetscInt)olengths_vals[i]; 6191 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6192 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6193 } 6194 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6195 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6196 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6197 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6198 6199 /* get new tags for clean communications */ 6200 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6201 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6202 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6203 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6204 6205 /* allocate for requests */ 6206 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6207 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6208 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6209 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6210 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6211 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6212 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6213 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6214 6215 /* communications */ 6216 ptr_idxs = recv_buffer_idxs; 6217 ptr_vals = recv_buffer_vals; 6218 ptr_idxs_is = recv_buffer_idxs_is; 6219 ptr_vecs = recv_buffer_vecs; 6220 for (i=0;i<n_recvs;i++) { 6221 source_dest = onodes[i]; 6222 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6223 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6224 ptr_idxs += olengths_idxs[i]; 6225 ptr_vals += olengths_vals[i]; 6226 if (nis) { 6227 source_dest = onodes_is[i]; 6228 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6229 ptr_idxs_is += olengths_idxs_is[i]; 6230 } 6231 if (nvecs) { 6232 source_dest = onodes[i]; 6233 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6234 ptr_vecs += olengths_idxs[i]-2; 6235 } 6236 } 6237 for (i=0;i<n_sends;i++) { 6238 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6239 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6240 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6241 if (nis) { 6242 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6243 } 6244 if (nvecs) { 6245 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6246 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6247 } 6248 } 6249 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6250 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6251 6252 /* assemble new l2g map */ 6253 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6254 ptr_idxs = recv_buffer_idxs; 6255 new_local_rows = 0; 6256 for (i=0;i<n_recvs;i++) { 6257 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6258 ptr_idxs += olengths_idxs[i]; 6259 } 6260 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6261 ptr_idxs = recv_buffer_idxs; 6262 new_local_rows = 0; 6263 for (i=0;i<n_recvs;i++) { 6264 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6265 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6266 ptr_idxs += olengths_idxs[i]; 6267 } 6268 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6269 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6270 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6271 6272 /* infer new local matrix type from received local matrices type */ 6273 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6274 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6275 if (n_recvs) { 6276 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6277 ptr_idxs = recv_buffer_idxs; 6278 for (i=0;i<n_recvs;i++) { 6279 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6280 new_local_type_private = MATAIJ_PRIVATE; 6281 break; 6282 } 6283 ptr_idxs += olengths_idxs[i]; 6284 } 6285 switch (new_local_type_private) { 6286 case MATDENSE_PRIVATE: 6287 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6288 new_local_type = MATSEQAIJ; 6289 bs = 1; 6290 } else { /* if I receive only 1 dense matrix */ 6291 new_local_type = MATSEQDENSE; 6292 bs = 1; 6293 } 6294 break; 6295 case MATAIJ_PRIVATE: 6296 new_local_type = MATSEQAIJ; 6297 bs = 1; 6298 break; 6299 case MATBAIJ_PRIVATE: 6300 new_local_type = MATSEQBAIJ; 6301 break; 6302 case MATSBAIJ_PRIVATE: 6303 new_local_type = MATSEQSBAIJ; 6304 break; 6305 default: 6306 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6307 break; 6308 } 6309 } else { /* by default, new_local_type is seqdense */ 6310 new_local_type = MATSEQDENSE; 6311 bs = 1; 6312 } 6313 6314 /* create MATIS object if needed */ 6315 if (!reuse) { 6316 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6317 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6318 } else { 6319 /* it also destroys the local matrices */ 6320 if (*mat_n) { 6321 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6322 } else { /* this is a fake object */ 6323 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6324 } 6325 } 6326 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6327 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6328 6329 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6330 6331 /* Global to local map of received indices */ 6332 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6333 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6334 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6335 6336 /* restore attributes -> type of incoming data and its size */ 6337 buf_size_idxs = 0; 6338 for (i=0;i<n_recvs;i++) { 6339 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6340 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6341 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6342 } 6343 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6344 6345 /* set preallocation */ 6346 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6347 if (!newisdense) { 6348 PetscInt *new_local_nnz=0; 6349 6350 ptr_idxs = recv_buffer_idxs_local; 6351 if (n_recvs) { 6352 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6353 } 6354 for (i=0;i<n_recvs;i++) { 6355 PetscInt j; 6356 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6357 for (j=0;j<*(ptr_idxs+1);j++) { 6358 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6359 } 6360 } else { 6361 /* TODO */ 6362 } 6363 ptr_idxs += olengths_idxs[i]; 6364 } 6365 if (new_local_nnz) { 6366 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6367 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6368 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6369 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6370 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6371 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6372 } else { 6373 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6374 } 6375 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6376 } else { 6377 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6378 } 6379 6380 /* set values */ 6381 ptr_vals = recv_buffer_vals; 6382 ptr_idxs = recv_buffer_idxs_local; 6383 for (i=0;i<n_recvs;i++) { 6384 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6385 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6386 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6387 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6388 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6389 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6390 } else { 6391 /* TODO */ 6392 } 6393 ptr_idxs += olengths_idxs[i]; 6394 ptr_vals += olengths_vals[i]; 6395 } 6396 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6397 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6398 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6399 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6400 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6401 6402 #if 0 6403 if (!restrict_comm) { /* check */ 6404 Vec lvec,rvec; 6405 PetscReal infty_error; 6406 6407 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6408 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6409 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6410 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6411 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6412 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6413 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6414 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6415 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6416 } 6417 #endif 6418 6419 /* assemble new additional is (if any) */ 6420 if (nis) { 6421 PetscInt **temp_idxs,*count_is,j,psum; 6422 6423 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6424 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6425 ptr_idxs = recv_buffer_idxs_is; 6426 psum = 0; 6427 for (i=0;i<n_recvs;i++) { 6428 for (j=0;j<nis;j++) { 6429 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6430 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6431 psum += plen; 6432 ptr_idxs += plen+1; /* shift pointer to received data */ 6433 } 6434 } 6435 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6436 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6437 for (i=1;i<nis;i++) { 6438 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6439 } 6440 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6441 ptr_idxs = recv_buffer_idxs_is; 6442 for (i=0;i<n_recvs;i++) { 6443 for (j=0;j<nis;j++) { 6444 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6445 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6446 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6447 ptr_idxs += plen+1; /* shift pointer to received data */ 6448 } 6449 } 6450 for (i=0;i<nis;i++) { 6451 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6452 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6453 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6454 } 6455 ierr = PetscFree(count_is);CHKERRQ(ierr); 6456 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6457 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6458 } 6459 /* free workspace */ 6460 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6461 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6462 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6463 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6464 if (isdense) { 6465 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6466 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6467 } else { 6468 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6469 } 6470 if (nis) { 6471 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6472 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6473 } 6474 6475 if (nvecs) { 6476 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6477 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6478 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6479 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6480 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6481 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6482 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6483 /* set values */ 6484 ptr_vals = recv_buffer_vecs; 6485 ptr_idxs = recv_buffer_idxs_local; 6486 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6487 for (i=0;i<n_recvs;i++) { 6488 PetscInt j; 6489 for (j=0;j<*(ptr_idxs+1);j++) { 6490 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6491 } 6492 ptr_idxs += olengths_idxs[i]; 6493 ptr_vals += olengths_idxs[i]-2; 6494 } 6495 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6496 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6497 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6498 } 6499 6500 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6501 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6502 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6503 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6504 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6505 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6506 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6507 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6508 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6509 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6510 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6511 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6512 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6513 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6514 ierr = PetscFree(onodes);CHKERRQ(ierr); 6515 if (nis) { 6516 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6517 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6518 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6519 } 6520 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6521 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6522 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6523 for (i=0;i<nis;i++) { 6524 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6525 } 6526 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6527 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6528 } 6529 *mat_n = NULL; 6530 } 6531 PetscFunctionReturn(0); 6532 } 6533 6534 /* temporary hack into ksp private data structure */ 6535 #include <petsc/private/kspimpl.h> 6536 6537 #undef __FUNCT__ 6538 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6539 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6540 { 6541 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6542 PC_IS *pcis = (PC_IS*)pc->data; 6543 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6544 Mat coarsedivudotp = NULL; 6545 MatNullSpace CoarseNullSpace = NULL; 6546 ISLocalToGlobalMapping coarse_islg; 6547 IS coarse_is,*isarray; 6548 PetscInt i,im_active=-1,active_procs=-1; 6549 PetscInt nis,nisdofs,nisneu,nisvert; 6550 PC pc_temp; 6551 PCType coarse_pc_type; 6552 KSPType coarse_ksp_type; 6553 PetscBool multilevel_requested,multilevel_allowed; 6554 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6555 Mat t_coarse_mat_is; 6556 PetscInt ncoarse; 6557 PetscBool compute_vecs = PETSC_FALSE; 6558 PetscScalar *array; 6559 MatReuse coarse_mat_reuse; 6560 PetscBool restr, full_restr, have_void; 6561 PetscErrorCode ierr; 6562 6563 PetscFunctionBegin; 6564 /* Assign global numbering to coarse dofs */ 6565 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6566 PetscInt ocoarse_size; 6567 compute_vecs = PETSC_TRUE; 6568 ocoarse_size = pcbddc->coarse_size; 6569 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6570 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6571 /* see if we can avoid some work */ 6572 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6573 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6574 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6575 PC pc; 6576 PetscBool isbddc; 6577 6578 /* temporary workaround since PCBDDC does not have a reset method so far */ 6579 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6580 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6581 if (isbddc) { 6582 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6583 } else { 6584 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6585 } 6586 coarse_reuse = PETSC_FALSE; 6587 } else { /* we can safely reuse already computed coarse matrix */ 6588 coarse_reuse = PETSC_TRUE; 6589 } 6590 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6591 coarse_reuse = PETSC_FALSE; 6592 } 6593 /* reset any subassembling information */ 6594 if (!coarse_reuse || pcbddc->recompute_topography) { 6595 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6596 } 6597 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6598 coarse_reuse = PETSC_TRUE; 6599 } 6600 /* assemble coarse matrix */ 6601 if (coarse_reuse && pcbddc->coarse_ksp) { 6602 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6603 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6604 coarse_mat_reuse = MAT_REUSE_MATRIX; 6605 } else { 6606 coarse_mat = NULL; 6607 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6608 } 6609 6610 /* creates temporary l2gmap and IS for coarse indexes */ 6611 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6612 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6613 6614 /* creates temporary MATIS object for coarse matrix */ 6615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6616 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6617 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6618 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6619 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6620 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6621 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6622 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6623 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6624 6625 /* count "active" (i.e. with positive local size) and "void" processes */ 6626 im_active = !!(pcis->n); 6627 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6628 6629 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6630 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6631 /* full_restr : just use the receivers from the subassembling pattern */ 6632 coarse_mat_is = NULL; 6633 multilevel_allowed = PETSC_FALSE; 6634 multilevel_requested = PETSC_FALSE; 6635 full_restr = PETSC_TRUE; 6636 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6637 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6638 if (multilevel_requested) { 6639 ncoarse = active_procs/pcbddc->coarsening_ratio; 6640 restr = PETSC_FALSE; 6641 full_restr = PETSC_FALSE; 6642 } else { 6643 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6644 restr = PETSC_TRUE; 6645 full_restr = PETSC_TRUE; 6646 } 6647 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6648 ncoarse = PetscMax(1,ncoarse); 6649 if (!pcbddc->coarse_subassembling) { 6650 if (pcbddc->coarsening_ratio > 1) { 6651 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6652 } else { 6653 PetscMPIInt size,rank; 6654 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6655 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6656 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6657 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6658 } 6659 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6660 PetscInt psum; 6661 PetscMPIInt size; 6662 if (pcbddc->coarse_ksp) psum = 1; 6663 else psum = 0; 6664 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6665 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6666 if (ncoarse < size) have_void = PETSC_TRUE; 6667 } 6668 /* determine if we can go multilevel */ 6669 if (multilevel_requested) { 6670 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6671 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6672 } 6673 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6674 6675 /* dump subassembling pattern */ 6676 if (pcbddc->dbg_flag && multilevel_allowed) { 6677 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6678 } 6679 6680 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6681 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6682 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6683 const PetscInt *idxs; 6684 ISLocalToGlobalMapping tmap; 6685 6686 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6687 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6688 /* allocate space for temporary storage */ 6689 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6690 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 6691 /* allocate for IS array */ 6692 nisdofs = pcbddc->n_ISForDofsLocal; 6693 nisneu = !!pcbddc->NeumannBoundariesLocal; 6694 nisvert = 0; /* nisvert is not used */ 6695 nis = nisdofs + nisneu + nisvert; 6696 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 6697 /* dofs splitting */ 6698 for (i=0;i<nisdofs;i++) { 6699 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 6700 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 6701 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6702 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6703 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6704 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6705 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6706 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 6707 } 6708 /* neumann boundaries */ 6709 if (pcbddc->NeumannBoundariesLocal) { 6710 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 6711 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 6712 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6713 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6714 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6715 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6716 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 6717 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 6718 } 6719 /* free memory */ 6720 ierr = PetscFree(tidxs);CHKERRQ(ierr); 6721 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 6722 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6723 } else { 6724 nis = 0; 6725 nisdofs = 0; 6726 nisneu = 0; 6727 nisvert = 0; 6728 isarray = NULL; 6729 } 6730 /* destroy no longer needed map */ 6731 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6732 6733 /* subassemble */ 6734 if (multilevel_allowed) { 6735 Vec vp[1]; 6736 PetscInt nvecs = 0; 6737 PetscBool reuse,reuser; 6738 6739 if (coarse_mat) reuse = PETSC_TRUE; 6740 else reuse = PETSC_FALSE; 6741 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6742 vp[0] = NULL; 6743 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6744 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6745 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6746 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6747 nvecs = 1; 6748 6749 if (pcbddc->divudotp) { 6750 Mat B,loc_divudotp; 6751 Vec v,p; 6752 IS dummy; 6753 PetscInt np; 6754 6755 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6756 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6757 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6758 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6759 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6760 ierr = VecSet(p,1.);CHKERRQ(ierr); 6761 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6762 ierr = VecDestroy(&p);CHKERRQ(ierr); 6763 ierr = MatDestroy(&B);CHKERRQ(ierr); 6764 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6765 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6766 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6767 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6768 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6769 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6770 ierr = VecDestroy(&v);CHKERRQ(ierr); 6771 } 6772 } 6773 if (reuser) { 6774 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6775 } else { 6776 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6777 } 6778 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6779 PetscScalar *arraym,*arrayv; 6780 PetscInt nl; 6781 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 6782 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 6783 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6784 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 6785 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 6786 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 6787 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6788 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 6789 } else { 6790 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 6791 } 6792 } else { 6793 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 6794 } 6795 if (coarse_mat_is || coarse_mat) { 6796 PetscMPIInt size; 6797 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 6798 if (!multilevel_allowed) { 6799 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 6800 } else { 6801 Mat A; 6802 6803 /* if this matrix is present, it means we are not reusing the coarse matrix */ 6804 if (coarse_mat_is) { 6805 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 6806 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 6807 coarse_mat = coarse_mat_is; 6808 } 6809 /* be sure we don't have MatSeqDENSE as local mat */ 6810 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 6811 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 6812 } 6813 } 6814 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 6815 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 6816 6817 /* create local to global scatters for coarse problem */ 6818 if (compute_vecs) { 6819 PetscInt lrows; 6820 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 6821 if (coarse_mat) { 6822 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 6823 } else { 6824 lrows = 0; 6825 } 6826 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 6827 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 6828 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 6829 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6830 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6831 } 6832 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 6833 6834 /* set defaults for coarse KSP and PC */ 6835 if (multilevel_allowed) { 6836 coarse_ksp_type = KSPRICHARDSON; 6837 coarse_pc_type = PCBDDC; 6838 } else { 6839 coarse_ksp_type = KSPPREONLY; 6840 coarse_pc_type = PCREDUNDANT; 6841 } 6842 6843 /* print some info if requested */ 6844 if (pcbddc->dbg_flag) { 6845 if (!multilevel_allowed) { 6846 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6847 if (multilevel_requested) { 6848 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6849 } else if (pcbddc->max_levels) { 6850 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 6851 } 6852 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6853 } 6854 } 6855 6856 /* create the coarse KSP object only once with defaults */ 6857 if (coarse_mat) { 6858 PetscViewer dbg_viewer = NULL; 6859 if (pcbddc->dbg_flag) { 6860 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 6861 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6862 } 6863 if (!pcbddc->coarse_ksp) { 6864 char prefix[256],str_level[16]; 6865 size_t len; 6866 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6867 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6868 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6869 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6870 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6871 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6872 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6873 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6874 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6875 /* prefix */ 6876 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6877 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6878 if (!pcbddc->current_level) { 6879 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6880 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6881 } else { 6882 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6883 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6884 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6885 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6886 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6887 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6888 } 6889 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6890 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6891 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6892 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6893 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6894 /* allow user customization */ 6895 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6896 } 6897 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6898 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6899 if (nisdofs) { 6900 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6901 for (i=0;i<nisdofs;i++) { 6902 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6903 } 6904 } 6905 if (nisneu) { 6906 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6907 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6908 } 6909 if (nisvert) { 6910 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6911 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6912 } 6913 6914 /* get some info after set from options */ 6915 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6916 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6917 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6918 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6919 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6920 isbddc = PETSC_FALSE; 6921 } 6922 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6923 if (isredundant) { 6924 KSP inner_ksp; 6925 PC inner_pc; 6926 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6927 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6928 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6929 } 6930 6931 /* parameters which miss an API */ 6932 if (isbddc) { 6933 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6934 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6935 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6936 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6937 if (pcbddc_coarse->benign_saddle_point) { 6938 Mat coarsedivudotp_is; 6939 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6940 IS row,col; 6941 const PetscInt *gidxs; 6942 PetscInt n,st,M,N; 6943 6944 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6945 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6946 st = st-n; 6947 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6948 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6949 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6950 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6951 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6952 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6953 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6954 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6955 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6956 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6957 ierr = ISDestroy(&row);CHKERRQ(ierr); 6958 ierr = ISDestroy(&col);CHKERRQ(ierr); 6959 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6960 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6961 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6962 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6963 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6964 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6965 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6966 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6967 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 6968 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6969 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6970 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6971 } 6972 } 6973 6974 /* propagate symmetry info of coarse matrix */ 6975 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6976 if (pc->pmat->symmetric_set) { 6977 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6978 } 6979 if (pc->pmat->hermitian_set) { 6980 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6981 } 6982 if (pc->pmat->spd_set) { 6983 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6984 } 6985 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6986 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6987 } 6988 /* set operators */ 6989 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6990 if (pcbddc->dbg_flag) { 6991 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6992 } 6993 } 6994 ierr = PetscFree(isarray);CHKERRQ(ierr); 6995 #if 0 6996 { 6997 PetscViewer viewer; 6998 char filename[256]; 6999 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7000 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7001 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7002 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7003 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7004 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7005 } 7006 #endif 7007 7008 if (pcbddc->coarse_ksp) { 7009 Vec crhs,csol; 7010 7011 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7012 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7013 if (!csol) { 7014 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7015 } 7016 if (!crhs) { 7017 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7018 } 7019 } 7020 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7021 7022 /* compute null space for coarse solver if the benign trick has been requested */ 7023 if (pcbddc->benign_null) { 7024 7025 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7026 for (i=0;i<pcbddc->benign_n;i++) { 7027 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7028 } 7029 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7030 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7031 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7032 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7033 if (coarse_mat) { 7034 Vec nullv; 7035 PetscScalar *array,*array2; 7036 PetscInt nl; 7037 7038 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7039 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7040 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7041 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7042 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7043 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7044 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7045 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7046 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7047 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7048 } 7049 } 7050 7051 if (pcbddc->coarse_ksp) { 7052 PetscBool ispreonly; 7053 7054 if (CoarseNullSpace) { 7055 PetscBool isnull; 7056 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7057 if (isnull) { 7058 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7059 } 7060 /* TODO: add local nullspaces (if any) */ 7061 } 7062 /* setup coarse ksp */ 7063 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7064 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7065 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7066 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7067 KSP check_ksp; 7068 KSPType check_ksp_type; 7069 PC check_pc; 7070 Vec check_vec,coarse_vec; 7071 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7072 PetscInt its; 7073 PetscBool compute_eigs; 7074 PetscReal *eigs_r,*eigs_c; 7075 PetscInt neigs; 7076 const char *prefix; 7077 7078 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7079 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7080 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7081 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7082 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7083 /* prevent from setup unneeded object */ 7084 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7085 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7086 if (ispreonly) { 7087 check_ksp_type = KSPPREONLY; 7088 compute_eigs = PETSC_FALSE; 7089 } else { 7090 check_ksp_type = KSPGMRES; 7091 compute_eigs = PETSC_TRUE; 7092 } 7093 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7094 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7095 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7096 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7097 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7098 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7099 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7100 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7101 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7102 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7103 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7104 /* create random vec */ 7105 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7106 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7107 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7108 /* solve coarse problem */ 7109 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7110 /* set eigenvalue estimation if preonly has not been requested */ 7111 if (compute_eigs) { 7112 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7113 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7114 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7115 if (neigs) { 7116 lambda_max = eigs_r[neigs-1]; 7117 lambda_min = eigs_r[0]; 7118 if (pcbddc->use_coarse_estimates) { 7119 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7120 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7121 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7122 } 7123 } 7124 } 7125 } 7126 7127 /* check coarse problem residual error */ 7128 if (pcbddc->dbg_flag) { 7129 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7130 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7131 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7132 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7133 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7134 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7135 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7136 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7137 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7138 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7139 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7140 if (CoarseNullSpace) { 7141 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7142 } 7143 if (compute_eigs) { 7144 PetscReal lambda_max_s,lambda_min_s; 7145 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7146 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7147 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7148 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7149 for (i=0;i<neigs;i++) { 7150 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7151 } 7152 } 7153 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7154 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7155 } 7156 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7157 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7158 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7159 if (compute_eigs) { 7160 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7161 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7162 } 7163 } 7164 } 7165 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7166 /* print additional info */ 7167 if (pcbddc->dbg_flag) { 7168 /* waits until all processes reaches this point */ 7169 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7170 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7171 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7172 } 7173 7174 /* free memory */ 7175 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7176 PetscFunctionReturn(0); 7177 } 7178 7179 #undef __FUNCT__ 7180 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7181 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7182 { 7183 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7184 PC_IS* pcis = (PC_IS*)pc->data; 7185 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7186 IS subset,subset_mult,subset_n; 7187 PetscInt local_size,coarse_size=0; 7188 PetscInt *local_primal_indices=NULL; 7189 const PetscInt *t_local_primal_indices; 7190 PetscErrorCode ierr; 7191 7192 PetscFunctionBegin; 7193 /* Compute global number of coarse dofs */ 7194 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7195 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7196 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7197 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7198 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7199 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7200 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7201 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7202 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7203 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7204 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7205 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7206 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7207 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7208 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7209 7210 /* check numbering */ 7211 if (pcbddc->dbg_flag) { 7212 PetscScalar coarsesum,*array,*array2; 7213 PetscInt i; 7214 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7215 7216 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7217 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7218 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7219 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7220 /* counter */ 7221 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7222 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7223 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7224 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7225 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7226 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7227 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7228 for (i=0;i<pcbddc->local_primal_size;i++) { 7229 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7230 } 7231 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7232 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7233 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7234 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7235 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7236 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7237 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7238 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7239 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7240 for (i=0;i<pcis->n;i++) { 7241 if (array[i] != 0.0 && array[i] != array2[i]) { 7242 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7243 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7244 set_error = PETSC_TRUE; 7245 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7246 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7247 } 7248 } 7249 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7250 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7251 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7252 for (i=0;i<pcis->n;i++) { 7253 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7254 } 7255 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7256 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7257 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7258 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7259 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7260 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7261 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7262 PetscInt *gidxs; 7263 7264 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7265 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7266 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7267 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7268 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7269 for (i=0;i<pcbddc->local_primal_size;i++) { 7270 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7271 } 7272 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7273 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7274 } 7275 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7276 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7277 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7278 } 7279 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7280 /* get back data */ 7281 *coarse_size_n = coarse_size; 7282 *local_primal_indices_n = local_primal_indices; 7283 PetscFunctionReturn(0); 7284 } 7285 7286 #undef __FUNCT__ 7287 #define __FUNCT__ "PCBDDCGlobalToLocal" 7288 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7289 { 7290 IS localis_t; 7291 PetscInt i,lsize,*idxs,n; 7292 PetscScalar *vals; 7293 PetscErrorCode ierr; 7294 7295 PetscFunctionBegin; 7296 /* get indices in local ordering exploiting local to global map */ 7297 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7298 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7299 for (i=0;i<lsize;i++) vals[i] = 1.0; 7300 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7301 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7302 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7303 if (idxs) { /* multilevel guard */ 7304 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7305 } 7306 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7307 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7308 ierr = PetscFree(vals);CHKERRQ(ierr); 7309 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7310 /* now compute set in local ordering */ 7311 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7312 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7313 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7314 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7315 for (i=0,lsize=0;i<n;i++) { 7316 if (PetscRealPart(vals[i]) > 0.5) { 7317 lsize++; 7318 } 7319 } 7320 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7321 for (i=0,lsize=0;i<n;i++) { 7322 if (PetscRealPart(vals[i]) > 0.5) { 7323 idxs[lsize++] = i; 7324 } 7325 } 7326 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7327 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7328 *localis = localis_t; 7329 PetscFunctionReturn(0); 7330 } 7331 7332 #undef __FUNCT__ 7333 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7334 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7335 { 7336 PC_IS *pcis=(PC_IS*)pc->data; 7337 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7338 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7339 Mat S_j; 7340 PetscInt *used_xadj,*used_adjncy; 7341 PetscBool free_used_adj; 7342 PetscErrorCode ierr; 7343 7344 PetscFunctionBegin; 7345 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7346 free_used_adj = PETSC_FALSE; 7347 if (pcbddc->sub_schurs_layers == -1) { 7348 used_xadj = NULL; 7349 used_adjncy = NULL; 7350 } else { 7351 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7352 used_xadj = pcbddc->mat_graph->xadj; 7353 used_adjncy = pcbddc->mat_graph->adjncy; 7354 } else if (pcbddc->computed_rowadj) { 7355 used_xadj = pcbddc->mat_graph->xadj; 7356 used_adjncy = pcbddc->mat_graph->adjncy; 7357 } else { 7358 PetscBool flg_row=PETSC_FALSE; 7359 const PetscInt *xadj,*adjncy; 7360 PetscInt nvtxs; 7361 7362 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7363 if (flg_row) { 7364 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7365 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7366 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7367 free_used_adj = PETSC_TRUE; 7368 } else { 7369 pcbddc->sub_schurs_layers = -1; 7370 used_xadj = NULL; 7371 used_adjncy = NULL; 7372 } 7373 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7374 } 7375 } 7376 7377 /* setup sub_schurs data */ 7378 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7379 if (!sub_schurs->schur_explicit) { 7380 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7381 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7382 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7383 } else { 7384 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7385 PetscBool isseqaij,need_change = PETSC_FALSE; 7386 PetscInt benign_n; 7387 Mat change = NULL; 7388 Vec scaling = NULL; 7389 IS change_primal = NULL; 7390 7391 if (!pcbddc->use_vertices && reuse_solvers) { 7392 PetscInt n_vertices; 7393 7394 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7395 reuse_solvers = (PetscBool)!n_vertices; 7396 } 7397 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7398 if (!isseqaij) { 7399 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7400 if (matis->A == pcbddc->local_mat) { 7401 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7402 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7403 } else { 7404 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7405 } 7406 } 7407 if (!pcbddc->benign_change_explicit) { 7408 benign_n = pcbddc->benign_n; 7409 } else { 7410 benign_n = 0; 7411 } 7412 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7413 We need a global reduction to avoid possible deadlocks. 7414 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7415 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7416 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7417 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7418 need_change = (PetscBool)(!need_change); 7419 } 7420 /* If the user defines additional constraints, we import them here. 7421 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7422 if (need_change) { 7423 PC_IS *pcisf; 7424 PC_BDDC *pcbddcf; 7425 PC pcf; 7426 7427 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7428 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7429 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7430 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7431 /* hacks */ 7432 pcisf = (PC_IS*)pcf->data; 7433 pcisf->is_B_local = pcis->is_B_local; 7434 pcisf->vec1_N = pcis->vec1_N; 7435 pcisf->BtoNmap = pcis->BtoNmap; 7436 pcisf->n = pcis->n; 7437 pcisf->n_B = pcis->n_B; 7438 pcbddcf = (PC_BDDC*)pcf->data; 7439 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7440 pcbddcf->mat_graph = pcbddc->mat_graph; 7441 pcbddcf->use_faces = PETSC_TRUE; 7442 pcbddcf->use_change_of_basis = PETSC_TRUE; 7443 pcbddcf->use_change_on_faces = PETSC_TRUE; 7444 pcbddcf->use_qr_single = PETSC_TRUE; 7445 pcbddcf->fake_change = PETSC_TRUE; 7446 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7447 /* store information on primal vertices and change of basis (in local numbering) */ 7448 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7449 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7450 change = pcbddcf->ConstraintMatrix; 7451 pcbddcf->ConstraintMatrix = NULL; 7452 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7453 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7454 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7455 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7456 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7457 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7458 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7459 pcf->ops->destroy = NULL; 7460 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7461 } 7462 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7463 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7464 ierr = MatDestroy(&change);CHKERRQ(ierr); 7465 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7466 } 7467 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7468 7469 /* free adjacency */ 7470 if (free_used_adj) { 7471 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7472 } 7473 PetscFunctionReturn(0); 7474 } 7475 7476 #undef __FUNCT__ 7477 #define __FUNCT__ "PCBDDCInitSubSchurs" 7478 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7479 { 7480 PC_IS *pcis=(PC_IS*)pc->data; 7481 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7482 PCBDDCGraph graph; 7483 PetscErrorCode ierr; 7484 7485 PetscFunctionBegin; 7486 /* attach interface graph for determining subsets */ 7487 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7488 IS verticesIS,verticescomm; 7489 PetscInt vsize,*idxs; 7490 7491 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7492 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7493 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7494 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7495 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7496 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7497 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7498 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7499 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7500 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7501 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7502 } else { 7503 graph = pcbddc->mat_graph; 7504 } 7505 /* print some info */ 7506 if (pcbddc->dbg_flag) { 7507 IS vertices; 7508 PetscInt nv,nedges,nfaces; 7509 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7510 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7511 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7512 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7513 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7514 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7515 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7516 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7517 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7518 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7519 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7520 } 7521 7522 /* sub_schurs init */ 7523 if (!pcbddc->sub_schurs) { 7524 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7525 } 7526 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 7527 7528 /* free graph struct */ 7529 if (pcbddc->sub_schurs_rebuild) { 7530 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7531 } 7532 PetscFunctionReturn(0); 7533 } 7534 7535 #undef __FUNCT__ 7536 #define __FUNCT__ "PCBDDCCheckOperator" 7537 PetscErrorCode PCBDDCCheckOperator(PC pc) 7538 { 7539 PC_IS *pcis=(PC_IS*)pc->data; 7540 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7541 PetscErrorCode ierr; 7542 7543 PetscFunctionBegin; 7544 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7545 IS zerodiag = NULL; 7546 Mat S_j,B0_B=NULL; 7547 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7548 PetscScalar *p0_check,*array,*array2; 7549 PetscReal norm; 7550 PetscInt i; 7551 7552 /* B0 and B0_B */ 7553 if (zerodiag) { 7554 IS dummy; 7555 7556 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7557 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7558 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7559 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7560 } 7561 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7562 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7563 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7564 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7565 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7566 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7567 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7568 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7569 /* S_j */ 7570 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7571 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7572 7573 /* mimic vector in \widetilde{W}_\Gamma */ 7574 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7575 /* continuous in primal space */ 7576 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7577 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7578 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7579 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7580 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7581 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7582 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7583 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7584 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7585 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7586 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7587 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7588 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7589 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7590 7591 /* assemble rhs for coarse problem */ 7592 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7593 /* local with Schur */ 7594 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7595 if (zerodiag) { 7596 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7597 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7598 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7599 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7600 } 7601 /* sum on primal nodes the local contributions */ 7602 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7603 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7604 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7605 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7606 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7607 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7608 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7609 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7610 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7611 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7612 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7613 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7614 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7615 /* scale primal nodes (BDDC sums contibutions) */ 7616 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7617 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7618 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7619 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7620 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7621 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7622 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7623 /* global: \widetilde{B0}_B w_\Gamma */ 7624 if (zerodiag) { 7625 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7626 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7627 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7628 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7629 } 7630 /* BDDC */ 7631 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7632 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7633 7634 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7635 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7636 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7637 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7638 for (i=0;i<pcbddc->benign_n;i++) { 7639 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7640 } 7641 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7642 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7643 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7644 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7645 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7646 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7647 } 7648 PetscFunctionReturn(0); 7649 } 7650