1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 if (!esize) PetscFunctionReturn(0); 84 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 85 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 86 87 /* gradients */ 88 ptr = work + 5*esize; 89 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 90 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 91 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 92 ierr = MatDestroy(&GE);CHKERRQ(ierr); 93 94 /* constants */ 95 ptr += rsize*csize; 96 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 97 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 99 ierr = MatDestroy(&GE);CHKERRQ(ierr); 100 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 101 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "PCBDDCNedelecSupport" 107 PetscErrorCode PCBDDCNedelecSupport(PC pc) 108 { 109 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 110 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 111 Mat G,T,conn,lG,lGt,lGis,lGall,lGe; 112 MatNullSpace nnsp; 113 Vec tvec,*quads; 114 PetscSF sfv; 115 ISLocalToGlobalMapping el2g,vl2g,fl2g; 116 MPI_Comm comm; 117 IS lned,primals,allprimals,nedfieldlocal; 118 IS *eedges,*extrows,*extcols,*alleedges; 119 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee; 120 PetscScalar *vals,*work; 121 PetscReal *rwork; 122 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 123 PetscInt ne,nv,Lv,order,n,field; 124 PetscInt n_neigh,*neigh,*n_shared,**shared; 125 PetscInt i,j,extmem,cum,maxsize,rst,nee,nquads=2; 126 PetscInt *extrow,*extrowcum,*marks,*emarks,*vmarks,*gidxs; 127 PetscInt *sfvleaves,*sfvroots; 128 PetscBool print,eerr,done,lrc[2],conforming; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 /* test variable order code and print debug info TODO: to be removed */ 133 print = PETSC_FALSE; 134 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 135 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 136 137 /* Return to caller if there are no edges in the decomposition */ 138 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 139 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 140 ierr = ISLocalToGlobalMappingGetSize(el2g,&n);CHKERRQ(ierr); 141 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 142 lrc[0] = PETSC_FALSE; 143 for (i=0;i<n;i++) { 144 if (PetscRealPart(vals[i]) > 2.) { 145 lrc[0] = PETSC_TRUE; 146 break; 147 } 148 } 149 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 150 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 151 if (!lrc[1]) PetscFunctionReturn(0); 152 153 /* Get discrete gradient 154 If it is defined for a subset of dofs, assumes G is given in global ordering for all the dofs */ 155 G = pcbddc->discretegradient; 156 order = pcbddc->nedorder; 157 conforming = pcbddc->conforming; 158 field = pcbddc->nedfield; 159 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 160 if (pcbddc->n_ISForDofsLocal && field > -1) { 161 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 162 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 163 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 164 } else { 165 PetscBool testnedfield = PETSC_FALSE; 166 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 167 if (!testnedfield) { 168 ne = n; 169 nedfieldlocal = NULL; 170 } else { 171 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 172 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 173 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 174 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 175 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 176 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 177 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 178 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 179 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 180 for (i=0,cum=0;i<n;i++) { 181 if (matis->sf_leafdata[i] > 1) { 182 matis->sf_leafdata[cum++] = i; 183 } 184 } 185 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 186 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 187 } 188 } 189 190 if (nedfieldlocal) { /* merge with previous code when testing is done */ 191 IS is; 192 193 /* need to map from the local Nedelec field to local numbering */ 194 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 195 /* need to map from the local Nedelec field to global numbering */ 196 ierr = ISLocalToGlobalMappingApplyIS(el2g,nedfieldlocal,&is);CHKERRQ(ierr); 197 ierr = ISLocalToGlobalMappingCreateIS(is,&el2g);CHKERRQ(ierr); 198 ierr = ISDestroy(&is);CHKERRQ(ierr); 199 } else { 200 ierr = PetscObjectReference((PetscObject)el2g);CHKERRQ(ierr); 201 fl2g = NULL; 202 } 203 204 /* Sanity checks */ 205 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 206 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 207 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 208 209 /* Drop connections for interior edges (this modifies G) */ 210 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 211 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 212 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 213 if (nedfieldlocal) { 214 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 215 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 216 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 217 } else { 218 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 219 } 220 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 221 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 222 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 223 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 224 if (matis->sf_rootdata[i] < 2) { 225 matis->sf_rootdata[cum++] = i + rst; 226 } 227 } 228 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 229 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 230 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 231 232 /* Extract subdomain relevant rows of G */ 233 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 234 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 235 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 236 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 237 ierr = ISDestroy(&lned);CHKERRQ(ierr); 238 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 239 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 240 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 241 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 242 if (print) { 243 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 244 ierr = MatView(lG,NULL);CHKERRQ(ierr); 245 } 246 247 /* SF for nodal communications */ 248 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 249 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 250 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 251 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 252 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 253 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 254 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 255 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 257 258 /* Destroy temporary G created in MATIS format */ 259 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 260 261 /* Analyze the edge-nodes connections (duplicate lG) */ 262 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 263 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 264 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 265 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 266 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 267 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 268 /* need to import the boundary specification to ensure the 269 proper detection of coarse edges' endpoints */ 270 if (pcbddc->DirichletBoundariesLocal) { 271 IS is; 272 273 if (fl2g) { 274 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 275 } else { 276 is = pcbddc->DirichletBoundariesLocal; 277 } 278 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 279 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 280 for (i=0;i<cum;i++) { 281 if (idxs[i] >= 0) { 282 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 283 } 284 } 285 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 286 if (fl2g) { 287 ierr = ISDestroy(&is);CHKERRQ(ierr); 288 } 289 } 290 if (pcbddc->NeumannBoundariesLocal) { 291 IS is; 292 293 if (fl2g) { 294 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 295 } else { 296 is = pcbddc->NeumannBoundariesLocal; 297 } 298 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 299 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 300 for (i=0;i<cum;i++) { 301 if (idxs[i] >= 0) { 302 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 303 } 304 } 305 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 306 if (fl2g) { 307 ierr = ISDestroy(&is);CHKERRQ(ierr); 308 } 309 } 310 311 /* need to remove coarse faces' dofs to ensure the 312 proper detection of coarse edges' endpoints */ 313 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 314 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 315 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 316 for (i=1;i<n_neigh;i++) 317 for (j=0;j<n_shared[i];j++) 318 marks[shared[i][j]]++; 319 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 320 for (i=0;i<ne;i++) { 321 if (marks[i] > 1 || (marks[i] == 1 && PetscBTLookup(btb,i))) { 322 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 323 } 324 } 325 326 if (!conforming) { 327 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 328 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 329 } 330 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 331 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 332 cum = 0; 333 for (i=0;i<ne;i++) { 334 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 335 if (!PetscBTLookup(btee,i)) { 336 marks[cum++] = i; 337 continue; 338 } 339 /* set badly connected edge dofs as primal */ 340 if (!conforming) { 341 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 342 marks[cum++] = i; 343 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 344 for (j=ii[i];j<ii[i+1];j++) { 345 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 346 } 347 } else { 348 /* every edge dofs should be connected trough a certain number of nodal dofs 349 to other edge dofs belonging to coarse edges 350 - at most 2 endpoints 351 - order-1 interior nodal dofs 352 - no undefined nodal dofs (nconn < order) 353 */ 354 PetscInt ends = 0,ints = 0, undef = 0; 355 for (j=ii[i];j<ii[i+1];j++) { 356 PetscInt v = jj[j],k; 357 PetscInt nconn = iit[v+1]-iit[v]; 358 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 359 if (nconn > order) ends++; 360 else if (nconn == order) ints++; 361 else undef++; 362 } 363 if (undef || ends > 2 || ints != order -1) { 364 marks[cum++] = i; 365 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 366 for (j=ii[i];j<ii[i+1];j++) { 367 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 368 } 369 } 370 } 371 } 372 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 373 if (!order && ii[i+1] != ii[i]) { 374 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 375 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 376 } 377 } 378 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 379 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 380 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 381 if (!conforming) { 382 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 383 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 384 } 385 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 386 /* identify splitpoints and corner candidates: TODO variable order */ 387 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 388 if (print) { 389 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 390 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 391 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 392 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 393 } 394 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 395 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 396 for (i=0;i<nv;i++) { 397 PetscInt ord = order, test = ii[i+1]-ii[i]; 398 if (!order) { 399 PetscReal vorder = 0.; 400 401 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 402 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 403 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 404 ord = 1; 405 } 406 #if defined(PETSC_USE_DEBUG) 407 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 408 #endif 409 if (test >= 3*ord) { /* splitpoints */ 410 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 411 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 412 } else if (test == ord) { 413 if (order == 1) { 414 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 415 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 416 } else { 417 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 418 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 419 } 420 } 421 } 422 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 423 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 424 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 425 426 /* Get the local G^T explicitly */ 427 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 428 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 429 if (print) { 430 ierr = PetscObjectSetName((PetscObject)lGt,"initial_lGt");CHKERRQ(ierr); 431 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 432 } 433 434 /* Mark interior nodal dofs */ 435 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 436 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 437 for (i=1;i<n_neigh;i++) { 438 for (j=0;j<n_shared[i];j++) { 439 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 440 } 441 } 442 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 443 444 /* communicate corners and splitpoints */ 445 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 446 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 447 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 448 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 449 450 if (print) { 451 IS tbz; 452 453 cum = 0; 454 for (i=0;i<nv;i++) 455 if (sfvleaves[i]) 456 vmarks[cum++] = i; 457 458 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 459 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 460 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 461 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 462 } 463 464 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 465 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 466 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 467 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 468 469 /* Zero rows of lGt corresponding to identified corners 470 and interior nodal dofs */ 471 cum = 0; 472 for (i=0;i<nv;i++) { 473 if (sfvleaves[i]) { 474 vmarks[cum++] = i; 475 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 476 } 477 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 478 } 479 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 480 if (print) { 481 IS tbz; 482 483 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 484 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 485 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 486 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 487 } 488 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 489 ierr = PetscFree(vmarks);CHKERRQ(ierr); 490 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 491 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 492 493 /* Recompute G */ 494 ierr = MatDestroy(&lG);CHKERRQ(ierr); 495 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 496 if (print) { 497 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 498 ierr = MatView(lG,NULL);CHKERRQ(ierr); 499 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 500 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 501 } 502 503 /* Get primal dofs (if any) */ 504 cum = 0; 505 for (i=0;i<ne;i++) { 506 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 507 } 508 if (fl2g) { 509 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 510 } 511 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 512 if (print) { 513 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 514 ierr = ISView(primals,NULL);CHKERRQ(ierr); 515 } 516 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 517 /* TODO: what if the user passed in some of them ? */ 518 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 519 ierr = ISDestroy(&primals);CHKERRQ(ierr); 520 521 /* Compute edge connectivity */ 522 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 523 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 524 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 525 if (done && i && ii && jj) { /* when lG is empty, don't pass pointers */ 526 if (fl2g) { 527 PetscBT btf; 528 PetscInt *iia,*jja,*iiu,*jju; 529 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 530 531 /* create CSR for all local dofs */ 532 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 533 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 534 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 535 iiu = pcbddc->mat_graph->xadj; 536 jju = pcbddc->mat_graph->adjncy; 537 } else if (pcbddc->use_local_adj) { 538 rest = PETSC_TRUE; 539 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 540 } else { 541 free = PETSC_TRUE; 542 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 543 iiu[0] = 0; 544 for (i=0;i<n;i++) { 545 iiu[i+1] = i+1; 546 jju[i] = -1; 547 } 548 } 549 550 /* import sizes of CSR */ 551 iia[0] = 0; 552 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 553 554 /* overwrite entries corresponding to the Nedelec field */ 555 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 556 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 557 for (i=0;i<ne;i++) { 558 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 559 iia[idxs[i]+1] = ii[i+1]-ii[i]; 560 } 561 562 /* iia in CSR */ 563 for (i=0;i<n;i++) iia[i+1] += iia[i]; 564 565 /* jja in CSR */ 566 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 567 for (i=0;i<n;i++) 568 if (!PetscBTLookup(btf,i)) 569 for (j=0;j<iiu[i+1]-iiu[i];j++) 570 jja[iia[i]+j] = jju[iiu[i]+j]; 571 572 /* map edge dofs connectivity */ 573 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 574 for (i=0;i<ne;i++) { 575 PetscInt e = idxs[i]; 576 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 577 } 578 579 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 580 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 581 if (rest) { 582 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 583 } 584 if (free) { 585 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 586 } 587 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 588 } else { 589 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 590 } 591 } 592 593 /* Analyze interface for edge dofs */ 594 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 595 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 596 597 /* Get coarse edges in the edge space */ 598 pcbddc->mat_graph->twodim = PETSC_FALSE; 599 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 600 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 601 602 if (fl2g) { 603 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 604 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 605 for (i=0;i<nee;i++) { 606 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 607 } 608 } else { 609 eedges = alleedges; 610 primals = allprimals; 611 } 612 613 /* Mark fine edge dofs with their coarse edge id */ 614 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 615 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 616 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 617 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 618 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 619 if (print) { 620 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 621 ierr = ISView(primals,NULL);CHKERRQ(ierr); 622 } 623 624 maxsize = 0; 625 for (i=0;i<nee;i++) { 626 PetscInt size,mark = i+1; 627 628 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 629 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 630 for (j=0;j<size;j++) marks[idxs[j]] = mark; 631 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 632 maxsize = PetscMax(maxsize,size); 633 } 634 635 /* Find coarse edge endpoints */ 636 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 637 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 638 for (i=0;i<nee;i++) { 639 PetscInt mark = i+1,size; 640 641 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 642 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 643 if (print) { 644 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 645 ISView(eedges[i],NULL); 646 } 647 for (j=0;j<size;j++) { 648 PetscInt k, ee = idxs[j]; 649 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 650 for (k=ii[ee];k<ii[ee+1];k++) { 651 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 652 if (PetscBTLookup(btv,jj[k])) { 653 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 654 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 655 if (print) PetscPrintf(PETSC_COMM_SELF," candidate? %d\n",(PetscBool)(PetscBTLookup(btvcand,jj[k]))); 656 PetscInt k2; 657 PetscBool corner = PETSC_FALSE; 658 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 659 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 660 /* it's a corner if either is connected with an edge dof belonging to a different cc or 661 if the edge dof lie on the natural part of the boundary */ 662 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 663 corner = PETSC_TRUE; 664 break; 665 } 666 } 667 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 668 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 669 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 670 } else { 671 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 672 } 673 } 674 } 675 } 676 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 677 } 678 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 679 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 680 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 681 682 /* Reset marked primal dofs */ 683 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 684 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 685 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 686 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 687 688 /* Compute extended cols indices */ 689 ierr = MatGetRowIJ(lG ,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 690 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 691 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 692 i *= maxsize; 693 ierr = PetscMalloc1(nee,&extcols);CHKERRQ(ierr); 694 ierr = PetscCalloc1(nee,&emarks);CHKERRQ(ierr); 695 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 696 eerr = PETSC_FALSE; 697 for (i=0;i<nee;i++) { 698 PetscInt size; 699 700 cum = 0; 701 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 702 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 703 for (j=0;j<size;j++) { 704 PetscInt k,ee = idxs[j]; 705 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 706 } 707 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 708 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 709 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 710 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 711 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 712 /* it may happen that endpoints are not defined at this point 713 if it is the case, mark this edge for a second pass */ 714 if (cum != size -1) { 715 emarks[i] = 1; 716 if (print) { 717 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 718 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 719 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 720 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 721 } 722 eerr = PETSC_TRUE; 723 } 724 } 725 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 726 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 727 if (done) { 728 PetscInt *newprimals; 729 730 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 731 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 732 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 733 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 734 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 735 for (i=0;i<nee;i++) { 736 if (emarks[i]) { 737 PetscInt size,mark = i+1; 738 739 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 740 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 741 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 742 for (j=0;j<size;j++) { 743 PetscInt k,ee = idxs[j]; 744 for (k=ii[ee];k<ii[ee+1];k++) { 745 /* set all candidates located on the edge as corners */ 746 if (PetscBTLookup(btvcand,jj[k])) { 747 PetscInt k2,vv = jj[k]; 748 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 749 /* set all edge dofs connected to candidate as primals */ 750 for (k2=iit[vv];k2<iit[vv+1];k2++) { 751 if (marks[jjt[k2]] == mark) { 752 PetscInt k3,ee2 = jjt[k2]; 753 newprimals[cum++] = ee2; 754 /* finally set the new corners */ 755 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 756 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 757 } 758 } 759 } 760 } 761 } 762 } 763 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 764 } 765 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 766 } 767 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 768 if (fl2g) { 769 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 770 ierr = ISDestroy(&primals);CHKERRQ(ierr); 771 for (i=0;i<nee;i++) { 772 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 773 } 774 ierr = PetscFree(eedges);CHKERRQ(ierr); 775 } 776 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 777 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 778 ierr = PetscFree(newprimals);CHKERRQ(ierr); 779 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 780 ierr = ISDestroy(&primals);CHKERRQ(ierr); 781 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 782 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 783 if (fl2g) { 784 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 785 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 786 for (i=0;i<nee;i++) { 787 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 788 } 789 } else { 790 eedges = alleedges; 791 primals = allprimals; 792 } 793 794 /* Mark again */ 795 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 796 for (i=0;i<nee;i++) { 797 PetscInt size,mark = i+1; 798 799 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 800 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 801 for (j=0;j<size;j++) marks[idxs[j]] = mark; 802 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 803 } 804 if (print) { 805 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 806 ierr = ISView(primals,NULL);CHKERRQ(ierr); 807 } 808 809 /* Recompute extended cols */ 810 eerr = PETSC_FALSE; 811 for (i=0;i<nee;i++) { 812 PetscInt size; 813 814 cum = 0; 815 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 816 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 817 for (j=0;j<size;j++) { 818 PetscInt k,ee = idxs[j]; 819 for (k=ii[ee];k<ii[ee+1];k++) { 820 if (!PetscBTLookup(btv,jj[k])) { 821 extrow[cum++] = jj[k]; 822 } 823 } 824 } 825 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 826 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 827 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 828 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 829 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 830 if (cum != size -1) { 831 if (print) { 832 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 833 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 834 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 835 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 836 } 837 eerr = PETSC_TRUE; 838 } 839 } 840 } 841 ierr = MatRestoreRowIJ( lG,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 842 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 843 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 844 ierr = PetscFree(emarks);CHKERRQ(ierr); 845 /* an error should not occur at this point */ 846 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 847 848 /* Check the number of endpoints */ 849 /* TODO: fix case for circular edge */ 850 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 851 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 852 for (i=0;i<nee;i++) { 853 PetscInt size, found = 0; 854 855 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 856 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 857 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 858 for (j=0;j<size;j++) { 859 PetscInt k,ee = idxs[j]; 860 for (k=ii[ee];k<ii[ee+1];k++) { 861 PetscInt vv = jj[k]; 862 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 863 found++; 864 } 865 } 866 } 867 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 868 if (found != 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d\n",found,i); 869 } 870 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 871 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 872 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 873 874 #if defined(PETSC_USE_DEBUG) 875 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 876 not interfere with neighbouring coarse edges */ 877 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 878 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 879 for (i=0;i<nv;i++) { 880 PetscInt emax = 0,eemax = 0; 881 882 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 883 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 884 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 885 for (j=1;j<nee+1;j++) { 886 if (emax < emarks[j]) { 887 emax = emarks[j]; 888 eemax = j; 889 } 890 } 891 /* not relevant for edges */ 892 if (!eemax) continue; 893 894 for (j=ii[i];j<ii[i+1];j++) { 895 if (marks[jj[j]] && marks[jj[j]] != eemax) { 896 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 897 } 898 } 899 } 900 ierr = PetscFree(emarks);CHKERRQ(ierr); 901 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 902 #endif 903 904 /* Compute extended rows indices for edge blocks of the change of basis */ 905 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 906 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 907 extmem *= maxsize; 908 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 909 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 910 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 911 for (i=0;i<nv;i++) { 912 PetscInt mark = 0,size,start; 913 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 914 for (j=ii[i];j<ii[i+1];j++) 915 if (marks[jj[j]] && !mark) 916 mark = marks[jj[j]]; 917 918 /* not relevant */ 919 if (!mark) continue; 920 921 /* import extended row */ 922 mark--; 923 start = mark*extmem+extrowcum[mark]; 924 size = ii[i+1]-ii[i]; 925 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 926 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 927 extrowcum[mark] += size; 928 } 929 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 930 cum = 0; 931 for (i=0;i<nee;i++) { 932 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 933 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 934 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 935 cum = PetscMax(cum,size); 936 } 937 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 938 ierr = PetscFree(marks);CHKERRQ(ierr); 939 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 940 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 941 942 /* Workspace for lapack inner calls and VecSetValues */ 943 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 944 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 945 for (i=0;i<maxsize;i++) vals[i] = 1.; 946 947 /* Create vectors for quadrature rules */ 948 /* TODO preserve other quadratures */ 949 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 950 for (i=0;i<nquads;i++) { 951 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 952 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 953 } 954 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 955 956 /* Create change of basis matrix (preallocation can be improved) */ 957 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 958 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 959 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 960 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 961 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 962 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 963 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 964 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 965 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 966 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 967 968 /* Defaults to identity */ 969 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 970 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 971 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 972 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 973 974 for (i=0;i<nee;i++) { 975 Mat Gins = NULL, GKins = NULL; 976 977 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 978 if (Gins && GKins) { 979 PetscScalar *data; 980 const PetscInt *rows,*cols; 981 PetscInt nrh,nch,nrc,ncc; 982 983 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 984 /* H1 */ 985 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 986 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 987 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 988 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 989 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 990 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 991 /* complement */ 992 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 993 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 994 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 995 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 996 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 997 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 998 /* Gins kernel quadratures */ 999 for (j=0;j<ncc;j++) { 1000 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 1001 } 1002 /* H1 average */ 1003 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1004 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1005 } 1006 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1007 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1008 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1009 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1010 } 1011 1012 /* Start assembling */ 1013 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1014 for (i=0;i<nquads;i++) { 1015 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 1016 } 1017 1018 /* Free */ 1019 if (fl2g) { 1020 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1021 for (i=0;i<nee;i++) { 1022 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1023 } 1024 ierr = PetscFree(eedges);CHKERRQ(ierr); 1025 } 1026 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1027 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1028 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1029 ierr = PetscFree(extrow);CHKERRQ(ierr); 1030 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1031 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1032 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1033 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1034 ierr = PetscFree(vals);CHKERRQ(ierr); 1035 ierr = PetscFree(extrows);CHKERRQ(ierr); 1036 ierr = PetscFree(extcols);CHKERRQ(ierr); 1037 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1038 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1039 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1040 1041 /* Complete assembling */ 1042 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1043 for (i=0;i<nquads;i++) { 1044 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 1045 } 1046 for (i=0;i<nquads;i++) { 1047 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 1048 } 1049 ierr = PetscFree(quads);CHKERRQ(ierr); 1050 1051 /* tell PCBDDC the topography has been analyzed */ 1052 pcbddc->recompute_topography = PETSC_FALSE; 1053 1054 /* set change of basis */ 1055 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1056 ierr = MatDestroy(&T);CHKERRQ(ierr); 1057 1058 /* set quadratures */ 1059 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 1060 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 1061 1062 PetscFunctionReturn(0); 1063 } 1064 1065 /* the near-null space of BDDC carries information on quadrature weights, 1066 and these can be collinear -> so cheat with MatNullSpaceCreate 1067 and create a suitable set of basis vectors first */ 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1070 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1071 { 1072 PetscErrorCode ierr; 1073 PetscInt i; 1074 1075 PetscFunctionBegin; 1076 for (i=0;i<nvecs;i++) { 1077 PetscInt first,last; 1078 1079 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1080 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1081 if (i>=first && i < last) { 1082 PetscScalar *data; 1083 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1084 if (!has_const) { 1085 data[i-first] = 1.; 1086 } else { 1087 data[2*i-first] = 1./PetscSqrtReal(2.); 1088 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1089 } 1090 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1091 } 1092 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1093 } 1094 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1095 for (i=0;i<nvecs;i++) { /* reset vectors */ 1096 PetscInt first,last; 1097 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1098 if (i>=first && i < last) { 1099 PetscScalar *data; 1100 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1101 if (!has_const) { 1102 data[i-first] = 0.; 1103 } else { 1104 data[2*i-first] = 0.; 1105 data[2*i-first+1] = 0.; 1106 } 1107 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1108 } 1109 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1110 } 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1116 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1117 { 1118 Mat loc_divudotp; 1119 Vec p,v,vins,quad_vec,*quad_vecs; 1120 ISLocalToGlobalMapping map; 1121 IS *faces,*edges; 1122 PetscScalar *vals; 1123 const PetscScalar *array; 1124 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1125 PetscMPIInt rank; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1130 if (graph->twodim) { 1131 lmaxneighs = 2; 1132 } else { 1133 lmaxneighs = 1; 1134 for (i=0;i<ne;i++) { 1135 const PetscInt *idxs; 1136 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1137 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1138 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1139 } 1140 lmaxneighs++; /* graph count does not include self */ 1141 } 1142 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1143 maxsize = 0; 1144 for (i=0;i<ne;i++) { 1145 PetscInt nn; 1146 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1147 maxsize = PetscMax(maxsize,nn); 1148 } 1149 for (i=0;i<nf;i++) { 1150 PetscInt nn; 1151 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1152 maxsize = PetscMax(maxsize,nn); 1153 } 1154 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1155 /* create vectors to hold quadrature weights */ 1156 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1157 if (!transpose) { 1158 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1159 } else { 1160 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1161 } 1162 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1163 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1164 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1165 for (i=0;i<maxneighs;i++) { 1166 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1167 } 1168 1169 /* compute local quad vec */ 1170 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1171 if (!transpose) { 1172 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1173 } else { 1174 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1175 } 1176 ierr = VecSet(p,1.);CHKERRQ(ierr); 1177 if (!transpose) { 1178 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1179 } else { 1180 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1181 } 1182 if (vl2l) { 1183 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1184 } else { 1185 vins = v; 1186 } 1187 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1188 ierr = VecDestroy(&p);CHKERRQ(ierr); 1189 1190 /* insert in global quadrature vecs */ 1191 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1192 for (i=0;i<nf;i++) { 1193 const PetscInt *idxs; 1194 PetscInt idx,nn,j; 1195 1196 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1197 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1198 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1199 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1200 idx = -(idx+1); 1201 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1202 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1203 } 1204 for (i=0;i<ne;i++) { 1205 const PetscInt *idxs; 1206 PetscInt idx,nn,j; 1207 1208 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1209 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1210 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1211 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1212 idx = -(idx+1); 1213 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1214 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1215 } 1216 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1217 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1218 if (vl2l) { 1219 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1220 } 1221 ierr = VecDestroy(&v);CHKERRQ(ierr); 1222 ierr = PetscFree(vals);CHKERRQ(ierr); 1223 1224 /* assemble near null space */ 1225 for (i=0;i<maxneighs;i++) { 1226 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1227 } 1228 for (i=0;i<maxneighs;i++) { 1229 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1230 } 1231 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1238 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1239 { 1240 PetscErrorCode ierr; 1241 Vec local,global; 1242 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1243 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1244 1245 PetscFunctionBegin; 1246 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1247 /* need to convert from global to local topology information and remove references to information in global ordering */ 1248 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1249 if (pcbddc->user_provided_isfordofs) { 1250 if (pcbddc->n_ISForDofs) { 1251 PetscInt i; 1252 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1253 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1254 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1255 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1256 } 1257 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1258 pcbddc->n_ISForDofs = 0; 1259 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1260 } 1261 } else { 1262 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1263 PetscInt i, n = matis->A->rmap->n; 1264 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1265 if (i > 1) { 1266 pcbddc->n_ISForDofsLocal = i; 1267 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1268 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1269 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1270 } 1271 } 1272 } 1273 } 1274 1275 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1276 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1277 } 1278 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1279 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1280 } 1281 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1282 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1283 } 1284 ierr = VecDestroy(&global);CHKERRQ(ierr); 1285 ierr = VecDestroy(&local);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1291 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1292 { 1293 PC_IS *pcis = (PC_IS*)(pc->data); 1294 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1295 PetscErrorCode ierr; 1296 1297 PetscFunctionBegin; 1298 if (!pcbddc->benign_have_null) { 1299 PetscFunctionReturn(0); 1300 } 1301 if (pcbddc->ChangeOfBasisMatrix) { 1302 Vec swap; 1303 1304 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1305 swap = pcbddc->work_change; 1306 pcbddc->work_change = r; 1307 r = swap; 1308 } 1309 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1310 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1311 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1312 ierr = VecSet(z,0.);CHKERRQ(ierr); 1313 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1314 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1315 if (pcbddc->ChangeOfBasisMatrix) { 1316 Vec swap; 1317 1318 swap = r; 1319 r = pcbddc->work_change; 1320 pcbddc->work_change = swap; 1321 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1322 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1323 } 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1329 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1330 { 1331 PCBDDCBenignMatMult_ctx ctx; 1332 PetscErrorCode ierr; 1333 PetscBool apply_right,apply_left,reset_x; 1334 1335 PetscFunctionBegin; 1336 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1337 if (transpose) { 1338 apply_right = ctx->apply_left; 1339 apply_left = ctx->apply_right; 1340 } else { 1341 apply_right = ctx->apply_right; 1342 apply_left = ctx->apply_left; 1343 } 1344 reset_x = PETSC_FALSE; 1345 if (apply_right) { 1346 const PetscScalar *ax; 1347 PetscInt nl,i; 1348 1349 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1350 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1351 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1352 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1353 for (i=0;i<ctx->benign_n;i++) { 1354 PetscScalar sum,val; 1355 const PetscInt *idxs; 1356 PetscInt nz,j; 1357 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1358 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1359 sum = 0.; 1360 if (ctx->apply_p0) { 1361 val = ctx->work[idxs[nz-1]]; 1362 for (j=0;j<nz-1;j++) { 1363 sum += ctx->work[idxs[j]]; 1364 ctx->work[idxs[j]] += val; 1365 } 1366 } else { 1367 for (j=0;j<nz-1;j++) { 1368 sum += ctx->work[idxs[j]]; 1369 } 1370 } 1371 ctx->work[idxs[nz-1]] -= sum; 1372 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1373 } 1374 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1375 reset_x = PETSC_TRUE; 1376 } 1377 if (transpose) { 1378 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1379 } else { 1380 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1381 } 1382 if (reset_x) { 1383 ierr = VecResetArray(x);CHKERRQ(ierr); 1384 } 1385 if (apply_left) { 1386 PetscScalar *ay; 1387 PetscInt i; 1388 1389 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1390 for (i=0;i<ctx->benign_n;i++) { 1391 PetscScalar sum,val; 1392 const PetscInt *idxs; 1393 PetscInt nz,j; 1394 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1395 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1396 val = -ay[idxs[nz-1]]; 1397 if (ctx->apply_p0) { 1398 sum = 0.; 1399 for (j=0;j<nz-1;j++) { 1400 sum += ay[idxs[j]]; 1401 ay[idxs[j]] += val; 1402 } 1403 ay[idxs[nz-1]] += sum; 1404 } else { 1405 for (j=0;j<nz-1;j++) { 1406 ay[idxs[j]] += val; 1407 } 1408 ay[idxs[nz-1]] = 0.; 1409 } 1410 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1411 } 1412 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1413 } 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1419 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1420 { 1421 PetscErrorCode ierr; 1422 1423 PetscFunctionBegin; 1424 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1430 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1431 { 1432 PetscErrorCode ierr; 1433 1434 PetscFunctionBegin; 1435 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "PCBDDCBenignShellMat" 1441 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1442 { 1443 PC_IS *pcis = (PC_IS*)pc->data; 1444 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1445 PCBDDCBenignMatMult_ctx ctx; 1446 PetscErrorCode ierr; 1447 1448 PetscFunctionBegin; 1449 if (!restore) { 1450 Mat A_IB,A_BI; 1451 PetscScalar *work; 1452 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1453 1454 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1455 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1456 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1457 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1458 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1459 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1460 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1461 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1462 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1463 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1464 ctx->apply_left = PETSC_TRUE; 1465 ctx->apply_right = PETSC_FALSE; 1466 ctx->apply_p0 = PETSC_FALSE; 1467 ctx->benign_n = pcbddc->benign_n; 1468 if (reuse) { 1469 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1470 ctx->free = PETSC_FALSE; 1471 } else { /* TODO: could be optimized for successive solves */ 1472 ISLocalToGlobalMapping N_to_D; 1473 PetscInt i; 1474 1475 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1476 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1477 for (i=0;i<pcbddc->benign_n;i++) { 1478 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1479 } 1480 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1481 ctx->free = PETSC_TRUE; 1482 } 1483 ctx->A = pcis->A_IB; 1484 ctx->work = work; 1485 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1486 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1487 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1488 pcis->A_IB = A_IB; 1489 1490 /* A_BI as A_IB^T */ 1491 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1492 pcbddc->benign_original_mat = pcis->A_BI; 1493 pcis->A_BI = A_BI; 1494 } else { 1495 if (!pcbddc->benign_original_mat) { 1496 PetscFunctionReturn(0); 1497 } 1498 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1499 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1500 pcis->A_IB = ctx->A; 1501 ctx->A = NULL; 1502 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1503 pcis->A_BI = pcbddc->benign_original_mat; 1504 pcbddc->benign_original_mat = NULL; 1505 if (ctx->free) { 1506 PetscInt i; 1507 for (i=0;i<ctx->benign_n;i++) { 1508 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1509 } 1510 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1511 } 1512 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1513 ierr = PetscFree(ctx);CHKERRQ(ierr); 1514 } 1515 PetscFunctionReturn(0); 1516 } 1517 1518 /* used just in bddc debug mode */ 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "PCBDDCBenignProject" 1521 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1522 { 1523 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1524 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1525 Mat An; 1526 PetscErrorCode ierr; 1527 1528 PetscFunctionBegin; 1529 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1530 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1531 if (is1) { 1532 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1533 ierr = MatDestroy(&An);CHKERRQ(ierr); 1534 } else { 1535 *B = An; 1536 } 1537 PetscFunctionReturn(0); 1538 } 1539 1540 /* TODO: add reuse flag */ 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatSeqAIJCompress" 1543 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1544 { 1545 Mat Bt; 1546 PetscScalar *a,*bdata; 1547 const PetscInt *ii,*ij; 1548 PetscInt m,n,i,nnz,*bii,*bij; 1549 PetscBool flg_row; 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1554 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1555 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1556 nnz = n; 1557 for (i=0;i<ii[n];i++) { 1558 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1559 } 1560 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1561 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1562 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1563 nnz = 0; 1564 bii[0] = 0; 1565 for (i=0;i<n;i++) { 1566 PetscInt j; 1567 for (j=ii[i];j<ii[i+1];j++) { 1568 PetscScalar entry = a[j]; 1569 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1570 bij[nnz] = ij[j]; 1571 bdata[nnz] = entry; 1572 nnz++; 1573 } 1574 } 1575 bii[i+1] = nnz; 1576 } 1577 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1578 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1579 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1580 { 1581 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1582 b->free_a = PETSC_TRUE; 1583 b->free_ij = PETSC_TRUE; 1584 } 1585 *B = Bt; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatDetectDisconnectedComponents" 1591 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1592 { 1593 Mat B; 1594 IS is_dummy,*cc_n; 1595 ISLocalToGlobalMapping l2gmap_dummy; 1596 PCBDDCGraph graph; 1597 PetscInt i,n; 1598 PetscInt *xadj,*adjncy; 1599 PetscInt *xadj_filtered,*adjncy_filtered; 1600 PetscBool flg_row,isseqaij; 1601 PetscErrorCode ierr; 1602 1603 PetscFunctionBegin; 1604 if (!A->rmap->N || !A->cmap->N) { 1605 *ncc = 0; 1606 *cc = NULL; 1607 PetscFunctionReturn(0); 1608 } 1609 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1610 if (!isseqaij && filter) { 1611 PetscBool isseqdense; 1612 1613 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1614 if (!isseqdense) { 1615 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1616 } else { /* TODO: rectangular case and LDA */ 1617 PetscScalar *array; 1618 PetscReal chop=1.e-6; 1619 1620 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1621 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1622 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1623 for (i=0;i<n;i++) { 1624 PetscInt j; 1625 for (j=i+1;j<n;j++) { 1626 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1627 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1628 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1629 } 1630 } 1631 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1632 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1633 } 1634 } else { 1635 B = A; 1636 } 1637 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1638 1639 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1640 if (filter) { 1641 PetscScalar *data; 1642 PetscInt j,cum; 1643 1644 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1645 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1646 cum = 0; 1647 for (i=0;i<n;i++) { 1648 PetscInt t; 1649 1650 for (j=xadj[i];j<xadj[i+1];j++) { 1651 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1652 continue; 1653 } 1654 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1655 } 1656 t = xadj_filtered[i]; 1657 xadj_filtered[i] = cum; 1658 cum += t; 1659 } 1660 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1661 } else { 1662 xadj_filtered = NULL; 1663 adjncy_filtered = NULL; 1664 } 1665 1666 /* compute local connected components using PCBDDCGraph */ 1667 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1668 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1669 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1670 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1671 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1672 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1673 if (xadj_filtered) { 1674 graph->xadj = xadj_filtered; 1675 graph->adjncy = adjncy_filtered; 1676 } else { 1677 graph->xadj = xadj; 1678 graph->adjncy = adjncy; 1679 } 1680 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1681 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1682 /* partial clean up */ 1683 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1684 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1685 if (A != B) { 1686 ierr = MatDestroy(&B);CHKERRQ(ierr); 1687 } 1688 1689 /* get back data */ 1690 if (ncc) *ncc = graph->ncc; 1691 if (cc) { 1692 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1693 for (i=0;i<graph->ncc;i++) { 1694 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1695 } 1696 *cc = cc_n; 1697 } 1698 /* clean up graph */ 1699 graph->xadj = 0; 1700 graph->adjncy = 0; 1701 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "PCBDDCBenignCheck" 1707 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1708 { 1709 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1710 PC_IS* pcis = (PC_IS*)(pc->data); 1711 IS dirIS = NULL; 1712 PetscInt i; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1717 if (zerodiag) { 1718 Mat A; 1719 Vec vec3_N; 1720 PetscScalar *vals; 1721 const PetscInt *idxs; 1722 PetscInt nz,*count; 1723 1724 /* p0 */ 1725 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1726 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1727 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1728 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1729 for (i=0;i<nz;i++) vals[i] = 1.; 1730 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1731 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1732 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1733 /* v_I */ 1734 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1735 for (i=0;i<nz;i++) vals[i] = 0.; 1736 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1737 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1738 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1739 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1740 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1741 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1742 if (dirIS) { 1743 PetscInt n; 1744 1745 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1746 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1747 for (i=0;i<n;i++) vals[i] = 0.; 1748 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1749 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1750 } 1751 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1752 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1753 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1754 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1755 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1756 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1757 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1758 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1759 ierr = PetscFree(vals);CHKERRQ(ierr); 1760 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1761 1762 /* there should not be any pressure dofs lying on the interface */ 1763 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1764 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1765 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1766 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1767 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1768 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1769 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1770 ierr = PetscFree(count);CHKERRQ(ierr); 1771 } 1772 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1773 1774 /* check PCBDDCBenignGetOrSetP0 */ 1775 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1776 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1777 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1778 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1779 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1780 for (i=0;i<pcbddc->benign_n;i++) { 1781 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1782 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1783 } 1784 PetscFunctionReturn(0); 1785 } 1786 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1789 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1790 { 1791 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1792 IS pressures,zerodiag,*zerodiag_subs; 1793 PetscInt nz,n; 1794 PetscInt *interior_dofs,n_interior_dofs; 1795 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1796 PetscErrorCode ierr; 1797 1798 PetscFunctionBegin; 1799 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1800 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1801 for (n=0;n<pcbddc->benign_n;n++) { 1802 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1803 } 1804 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1805 pcbddc->benign_n = 0; 1806 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1807 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1808 Checks if all the pressure dofs in each subdomain have a zero diagonal 1809 If not, a change of basis on pressures is not needed 1810 since the local Schur complements are already SPD 1811 */ 1812 has_null_pressures = PETSC_TRUE; 1813 have_null = PETSC_TRUE; 1814 if (pcbddc->n_ISForDofsLocal) { 1815 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1816 1817 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1818 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1819 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1820 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1821 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1822 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1823 if (!sorted) { 1824 ierr = ISSort(pressures);CHKERRQ(ierr); 1825 } 1826 } else { 1827 pressures = NULL; 1828 } 1829 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1830 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1831 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1832 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1833 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1834 if (!sorted) { 1835 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1836 } 1837 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1838 if (!nz) { 1839 if (n) have_null = PETSC_FALSE; 1840 has_null_pressures = PETSC_FALSE; 1841 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1842 } 1843 recompute_zerodiag = PETSC_FALSE; 1844 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1845 zerodiag_subs = NULL; 1846 pcbddc->benign_n = 0; 1847 n_interior_dofs = 0; 1848 interior_dofs = NULL; 1849 if (pcbddc->current_level) { /* need to compute interior nodes */ 1850 PetscInt n,i,j; 1851 PetscInt n_neigh,*neigh,*n_shared,**shared; 1852 PetscInt *iwork; 1853 1854 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1855 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1856 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1857 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1858 for (i=1;i<n_neigh;i++) 1859 for (j=0;j<n_shared[i];j++) 1860 iwork[shared[i][j]] += 1; 1861 for (i=0;i<n;i++) 1862 if (!iwork[i]) 1863 interior_dofs[n_interior_dofs++] = i; 1864 ierr = PetscFree(iwork);CHKERRQ(ierr); 1865 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1866 } 1867 if (has_null_pressures) { 1868 IS *subs; 1869 PetscInt nsubs,i,j,nl; 1870 const PetscInt *idxs; 1871 PetscScalar *array; 1872 Vec *work; 1873 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1874 1875 subs = pcbddc->local_subs; 1876 nsubs = pcbddc->n_local_subs; 1877 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1878 if (pcbddc->current_level) { 1879 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1880 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1881 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1882 /* work[0] = 1_p */ 1883 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1884 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1885 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1886 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1887 /* work[0] = 1_v */ 1888 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1889 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1890 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1891 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1892 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1893 } 1894 if (nsubs > 1) { 1895 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1896 for (i=0;i<nsubs;i++) { 1897 ISLocalToGlobalMapping l2g; 1898 IS t_zerodiag_subs; 1899 PetscInt nl; 1900 1901 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1902 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1903 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1904 if (nl) { 1905 PetscBool valid = PETSC_TRUE; 1906 1907 if (pcbddc->current_level) { 1908 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1909 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1910 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1911 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1912 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1913 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1914 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 1915 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 1916 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1917 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 1918 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1919 for (j=0;j<n_interior_dofs;j++) { 1920 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1921 valid = PETSC_FALSE; 1922 break; 1923 } 1924 } 1925 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1926 } 1927 if (valid && pcbddc->NeumannBoundariesLocal) { 1928 IS t_bc; 1929 PetscInt nzb; 1930 1931 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 1932 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 1933 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 1934 if (nzb) valid = PETSC_FALSE; 1935 } 1936 if (valid && pressures) { 1937 IS t_pressure_subs; 1938 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 1939 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 1940 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 1941 } 1942 if (valid) { 1943 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 1944 pcbddc->benign_n++; 1945 } else { 1946 recompute_zerodiag = PETSC_TRUE; 1947 } 1948 } 1949 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 1950 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 1951 } 1952 } else { /* there's just one subdomain (or zero if they have not been detected */ 1953 PetscBool valid = PETSC_TRUE; 1954 1955 if (pcbddc->NeumannBoundariesLocal) { 1956 PetscInt nzb; 1957 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 1958 if (nzb) valid = PETSC_FALSE; 1959 } 1960 if (valid && pressures) { 1961 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 1962 } 1963 if (valid && pcbddc->current_level) { 1964 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 1965 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 1966 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1967 for (j=0;j<n_interior_dofs;j++) { 1968 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1969 valid = PETSC_FALSE; 1970 break; 1971 } 1972 } 1973 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1974 } 1975 if (valid) { 1976 pcbddc->benign_n = 1; 1977 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 1978 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 1979 zerodiag_subs[0] = zerodiag; 1980 } 1981 } 1982 if (pcbddc->current_level) { 1983 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 1984 } 1985 } 1986 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 1987 1988 if (!pcbddc->benign_n) { 1989 PetscInt n; 1990 1991 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1992 recompute_zerodiag = PETSC_FALSE; 1993 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1994 if (n) { 1995 has_null_pressures = PETSC_FALSE; 1996 have_null = PETSC_FALSE; 1997 } 1998 } 1999 2000 /* final check for null pressures */ 2001 if (zerodiag && pressures) { 2002 PetscInt nz,np; 2003 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2004 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2005 if (nz != np) have_null = PETSC_FALSE; 2006 } 2007 2008 if (recompute_zerodiag) { 2009 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2010 if (pcbddc->benign_n == 1) { 2011 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2012 zerodiag = zerodiag_subs[0]; 2013 } else { 2014 PetscInt i,nzn,*new_idxs; 2015 2016 nzn = 0; 2017 for (i=0;i<pcbddc->benign_n;i++) { 2018 PetscInt ns; 2019 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2020 nzn += ns; 2021 } 2022 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2023 nzn = 0; 2024 for (i=0;i<pcbddc->benign_n;i++) { 2025 PetscInt ns,*idxs; 2026 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2027 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2028 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2029 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2030 nzn += ns; 2031 } 2032 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2033 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2034 } 2035 have_null = PETSC_FALSE; 2036 } 2037 2038 /* Prepare matrix to compute no-net-flux */ 2039 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2040 Mat A,loc_divudotp; 2041 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2042 IS row,col,isused = NULL; 2043 PetscInt M,N,n,st,n_isused; 2044 2045 if (pressures) { 2046 isused = pressures; 2047 } else { 2048 isused = zerodiag; 2049 } 2050 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2051 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2052 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2053 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2054 n_isused = 0; 2055 if (isused) { 2056 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2057 } 2058 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2059 st = st-n_isused; 2060 if (n) { 2061 const PetscInt *gidxs; 2062 2063 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2064 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2065 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2066 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2067 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2068 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2069 } else { 2070 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2071 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2072 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2073 } 2074 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2075 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2076 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2077 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2078 ierr = ISDestroy(&row);CHKERRQ(ierr); 2079 ierr = ISDestroy(&col);CHKERRQ(ierr); 2080 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2081 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2082 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2083 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2084 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2085 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2086 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2087 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2088 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2089 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2090 } 2091 2092 /* change of basis and p0 dofs */ 2093 if (has_null_pressures) { 2094 IS zerodiagc; 2095 const PetscInt *idxs,*idxsc; 2096 PetscInt i,s,*nnz; 2097 2098 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2099 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2100 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2101 /* local change of basis for pressures */ 2102 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2103 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2104 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2105 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2106 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2107 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2108 for (i=0;i<pcbddc->benign_n;i++) { 2109 PetscInt nzs,j; 2110 2111 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2112 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2113 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2114 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2115 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2116 } 2117 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2118 ierr = PetscFree(nnz);CHKERRQ(ierr); 2119 /* set identity on velocities */ 2120 for (i=0;i<n-nz;i++) { 2121 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2122 } 2123 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2124 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2125 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2126 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2127 /* set change on pressures */ 2128 for (s=0;s<pcbddc->benign_n;s++) { 2129 PetscScalar *array; 2130 PetscInt nzs; 2131 2132 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2133 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2134 for (i=0;i<nzs-1;i++) { 2135 PetscScalar vals[2]; 2136 PetscInt cols[2]; 2137 2138 cols[0] = idxs[i]; 2139 cols[1] = idxs[nzs-1]; 2140 vals[0] = 1.; 2141 vals[1] = 1.; 2142 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2143 } 2144 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2145 for (i=0;i<nzs-1;i++) array[i] = -1.; 2146 array[nzs-1] = 1.; 2147 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2148 /* store local idxs for p0 */ 2149 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2150 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2151 ierr = PetscFree(array);CHKERRQ(ierr); 2152 } 2153 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2154 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2155 /* project if needed */ 2156 if (pcbddc->benign_change_explicit) { 2157 Mat M; 2158 2159 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2160 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2161 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2162 ierr = MatDestroy(&M);CHKERRQ(ierr); 2163 } 2164 /* store global idxs for p0 */ 2165 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2166 } 2167 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2168 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2169 2170 /* determines if the coarse solver will be singular or not */ 2171 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2172 /* determines if the problem has subdomains with 0 pressure block */ 2173 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2174 *zerodiaglocal = zerodiag; 2175 PetscFunctionReturn(0); 2176 } 2177 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2180 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2181 { 2182 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2183 PetscScalar *array; 2184 PetscErrorCode ierr; 2185 2186 PetscFunctionBegin; 2187 if (!pcbddc->benign_sf) { 2188 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2189 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2190 } 2191 if (get) { 2192 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2193 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2194 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2195 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2196 } else { 2197 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2198 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2199 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2200 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2201 } 2202 PetscFunctionReturn(0); 2203 } 2204 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2207 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2208 { 2209 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2210 PetscErrorCode ierr; 2211 2212 PetscFunctionBegin; 2213 /* TODO: add error checking 2214 - avoid nested pop (or push) calls. 2215 - cannot push before pop. 2216 - cannot call this if pcbddc->local_mat is NULL 2217 */ 2218 if (!pcbddc->benign_n) { 2219 PetscFunctionReturn(0); 2220 } 2221 if (pop) { 2222 if (pcbddc->benign_change_explicit) { 2223 IS is_p0; 2224 MatReuse reuse; 2225 2226 /* extract B_0 */ 2227 reuse = MAT_INITIAL_MATRIX; 2228 if (pcbddc->benign_B0) { 2229 reuse = MAT_REUSE_MATRIX; 2230 } 2231 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2232 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2233 /* remove rows and cols from local problem */ 2234 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2235 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2236 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2237 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2238 } else { 2239 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2240 PetscScalar *vals; 2241 PetscInt i,n,*idxs_ins; 2242 2243 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2244 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2245 if (!pcbddc->benign_B0) { 2246 PetscInt *nnz; 2247 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2248 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2249 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2250 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2251 for (i=0;i<pcbddc->benign_n;i++) { 2252 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2253 nnz[i] = n - nnz[i]; 2254 } 2255 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2256 ierr = PetscFree(nnz);CHKERRQ(ierr); 2257 } 2258 2259 for (i=0;i<pcbddc->benign_n;i++) { 2260 PetscScalar *array; 2261 PetscInt *idxs,j,nz,cum; 2262 2263 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2264 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2265 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2266 for (j=0;j<nz;j++) vals[j] = 1.; 2267 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2268 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2269 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2270 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2271 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2272 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2273 cum = 0; 2274 for (j=0;j<n;j++) { 2275 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2276 vals[cum] = array[j]; 2277 idxs_ins[cum] = j; 2278 cum++; 2279 } 2280 } 2281 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2282 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2283 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2284 } 2285 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2286 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2287 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2288 } 2289 } else { /* push */ 2290 if (pcbddc->benign_change_explicit) { 2291 PetscInt i; 2292 2293 for (i=0;i<pcbddc->benign_n;i++) { 2294 PetscScalar *B0_vals; 2295 PetscInt *B0_cols,B0_ncol; 2296 2297 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2298 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2299 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2300 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2301 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2302 } 2303 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2304 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2305 } else { 2306 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2307 } 2308 } 2309 PetscFunctionReturn(0); 2310 } 2311 2312 #undef __FUNCT__ 2313 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2314 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2315 { 2316 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2317 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2318 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2319 PetscBLASInt *B_iwork,*B_ifail; 2320 PetscScalar *work,lwork; 2321 PetscScalar *St,*S,*eigv; 2322 PetscScalar *Sarray,*Starray; 2323 PetscReal *eigs,thresh; 2324 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2325 PetscBool allocated_S_St; 2326 #if defined(PETSC_USE_COMPLEX) 2327 PetscReal *rwork; 2328 #endif 2329 PetscErrorCode ierr; 2330 2331 PetscFunctionBegin; 2332 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2333 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2334 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2335 2336 if (pcbddc->dbg_flag) { 2337 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2338 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2339 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2340 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2341 } 2342 2343 if (pcbddc->dbg_flag) { 2344 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2345 } 2346 2347 /* max size of subsets */ 2348 mss = 0; 2349 for (i=0;i<sub_schurs->n_subs;i++) { 2350 PetscInt subset_size; 2351 2352 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2353 mss = PetscMax(mss,subset_size); 2354 } 2355 2356 /* min/max and threshold */ 2357 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2358 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2359 nmax = PetscMax(nmin,nmax); 2360 allocated_S_St = PETSC_FALSE; 2361 if (nmin) { 2362 allocated_S_St = PETSC_TRUE; 2363 } 2364 2365 /* allocate lapack workspace */ 2366 cum = cum2 = 0; 2367 maxneigs = 0; 2368 for (i=0;i<sub_schurs->n_subs;i++) { 2369 PetscInt n,subset_size; 2370 2371 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2372 n = PetscMin(subset_size,nmax); 2373 cum += subset_size; 2374 cum2 += subset_size*n; 2375 maxneigs = PetscMax(maxneigs,n); 2376 } 2377 if (mss) { 2378 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2379 PetscBLASInt B_itype = 1; 2380 PetscBLASInt B_N = mss; 2381 PetscReal zero = 0.0; 2382 PetscReal eps = 0.0; /* dlamch? */ 2383 2384 B_lwork = -1; 2385 S = NULL; 2386 St = NULL; 2387 eigs = NULL; 2388 eigv = NULL; 2389 B_iwork = NULL; 2390 B_ifail = NULL; 2391 #if defined(PETSC_USE_COMPLEX) 2392 rwork = NULL; 2393 #endif 2394 thresh = 1.0; 2395 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2396 #if defined(PETSC_USE_COMPLEX) 2397 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2398 #else 2399 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2400 #endif 2401 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2402 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2403 } else { 2404 /* TODO */ 2405 } 2406 } else { 2407 lwork = 0; 2408 } 2409 2410 nv = 0; 2411 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2412 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2413 } 2414 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2415 if (allocated_S_St) { 2416 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2417 } 2418 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2419 #if defined(PETSC_USE_COMPLEX) 2420 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2421 #endif 2422 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2423 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2424 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2425 nv+cum,&pcbddc->adaptive_constraints_idxs, 2426 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2427 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2428 2429 maxneigs = 0; 2430 cum = cumarray = 0; 2431 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2432 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2433 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2434 const PetscInt *idxs; 2435 2436 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2437 for (cum=0;cum<nv;cum++) { 2438 pcbddc->adaptive_constraints_n[cum] = 1; 2439 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2440 pcbddc->adaptive_constraints_data[cum] = 1.0; 2441 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2442 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2443 } 2444 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2445 } 2446 2447 if (mss) { /* multilevel */ 2448 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2449 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2450 } 2451 2452 thresh = pcbddc->adaptive_threshold; 2453 for (i=0;i<sub_schurs->n_subs;i++) { 2454 const PetscInt *idxs; 2455 PetscReal upper,lower; 2456 PetscInt j,subset_size,eigs_start = 0; 2457 PetscBLASInt B_N; 2458 PetscBool same_data = PETSC_FALSE; 2459 2460 if (pcbddc->use_deluxe_scaling) { 2461 upper = PETSC_MAX_REAL; 2462 lower = thresh; 2463 } else { 2464 upper = 1./thresh; 2465 lower = 0.; 2466 } 2467 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2468 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2469 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2470 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2471 if (sub_schurs->is_hermitian) { 2472 PetscInt j,k; 2473 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2474 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2475 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2476 } 2477 for (j=0;j<subset_size;j++) { 2478 for (k=j;k<subset_size;k++) { 2479 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2480 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2481 } 2482 } 2483 } else { 2484 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2485 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2486 } 2487 } else { 2488 S = Sarray + cumarray; 2489 St = Starray + cumarray; 2490 } 2491 /* see if we can save some work */ 2492 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2493 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2494 } 2495 2496 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2497 B_neigs = 0; 2498 } else { 2499 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2500 PetscBLASInt B_itype = 1; 2501 PetscBLASInt B_IL, B_IU; 2502 PetscReal eps = -1.0; /* dlamch? */ 2503 PetscInt nmin_s; 2504 PetscBool compute_range = PETSC_FALSE; 2505 2506 if (pcbddc->dbg_flag) { 2507 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2508 } 2509 2510 compute_range = PETSC_FALSE; 2511 if (thresh > 1.+PETSC_SMALL && !same_data) { 2512 compute_range = PETSC_TRUE; 2513 } 2514 2515 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2516 if (compute_range) { 2517 2518 /* ask for eigenvalues larger than thresh */ 2519 #if defined(PETSC_USE_COMPLEX) 2520 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2521 #else 2522 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2523 #endif 2524 } else if (!same_data) { 2525 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2526 B_IL = 1; 2527 #if defined(PETSC_USE_COMPLEX) 2528 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2529 #else 2530 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2531 #endif 2532 } else { /* same_data is true, so get the adaptive function requested by the user */ 2533 PetscInt k; 2534 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2535 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2536 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2537 nmin = nmax; 2538 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2539 for (k=0;k<nmax;k++) { 2540 eigs[k] = 1./PETSC_SMALL; 2541 eigv[k*(subset_size+1)] = 1.0; 2542 } 2543 } 2544 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2545 if (B_ierr) { 2546 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2547 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2548 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2549 } 2550 2551 if (B_neigs > nmax) { 2552 if (pcbddc->dbg_flag) { 2553 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2554 } 2555 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2556 B_neigs = nmax; 2557 } 2558 2559 nmin_s = PetscMin(nmin,B_N); 2560 if (B_neigs < nmin_s) { 2561 PetscBLASInt B_neigs2; 2562 2563 if (pcbddc->use_deluxe_scaling) { 2564 B_IL = B_N - nmin_s + 1; 2565 B_IU = B_N - B_neigs; 2566 } else { 2567 B_IL = B_neigs + 1; 2568 B_IU = nmin_s; 2569 } 2570 if (pcbddc->dbg_flag) { 2571 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2572 } 2573 if (sub_schurs->is_hermitian) { 2574 PetscInt j,k; 2575 for (j=0;j<subset_size;j++) { 2576 for (k=j;k<subset_size;k++) { 2577 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2578 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2579 } 2580 } 2581 } else { 2582 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2583 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2584 } 2585 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2586 #if defined(PETSC_USE_COMPLEX) 2587 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2588 #else 2589 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2590 #endif 2591 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2592 B_neigs += B_neigs2; 2593 } 2594 if (B_ierr) { 2595 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2596 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2597 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2598 } 2599 if (pcbddc->dbg_flag) { 2600 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2601 for (j=0;j<B_neigs;j++) { 2602 if (eigs[j] == 0.0) { 2603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2604 } else { 2605 if (pcbddc->use_deluxe_scaling) { 2606 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2607 } else { 2608 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2609 } 2610 } 2611 } 2612 } 2613 } else { 2614 /* TODO */ 2615 } 2616 } 2617 /* change the basis back to the original one */ 2618 if (sub_schurs->change) { 2619 Mat change,phi,phit; 2620 2621 if (pcbddc->dbg_flag > 1) { 2622 PetscInt ii; 2623 for (ii=0;ii<B_neigs;ii++) { 2624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2625 for (j=0;j<B_N;j++) { 2626 #if defined(PETSC_USE_COMPLEX) 2627 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2628 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2630 #else 2631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2632 #endif 2633 } 2634 } 2635 } 2636 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2637 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2638 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2639 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2640 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2641 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2642 } 2643 maxneigs = PetscMax(B_neigs,maxneigs); 2644 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2645 if (B_neigs) { 2646 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2647 2648 if (pcbddc->dbg_flag > 1) { 2649 PetscInt ii; 2650 for (ii=0;ii<B_neigs;ii++) { 2651 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2652 for (j=0;j<B_N;j++) { 2653 #if defined(PETSC_USE_COMPLEX) 2654 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2655 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2656 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2657 #else 2658 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2659 #endif 2660 } 2661 } 2662 } 2663 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2664 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2665 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2666 cum++; 2667 } 2668 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2669 /* shift for next computation */ 2670 cumarray += subset_size*subset_size; 2671 } 2672 if (pcbddc->dbg_flag) { 2673 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2674 } 2675 2676 if (mss) { 2677 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2678 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2679 /* destroy matrices (junk) */ 2680 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2681 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2682 } 2683 if (allocated_S_St) { 2684 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2685 } 2686 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2687 #if defined(PETSC_USE_COMPLEX) 2688 ierr = PetscFree(rwork);CHKERRQ(ierr); 2689 #endif 2690 if (pcbddc->dbg_flag) { 2691 PetscInt maxneigs_r; 2692 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2693 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2694 } 2695 PetscFunctionReturn(0); 2696 } 2697 2698 #undef __FUNCT__ 2699 #define __FUNCT__ "PCBDDCSetUpSolvers" 2700 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2701 { 2702 PetscScalar *coarse_submat_vals; 2703 PetscErrorCode ierr; 2704 2705 PetscFunctionBegin; 2706 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2707 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2708 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2709 2710 /* Setup local neumann solver ksp_R */ 2711 /* PCBDDCSetUpLocalScatters should be called first! */ 2712 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2713 2714 /* 2715 Setup local correction and local part of coarse basis. 2716 Gives back the dense local part of the coarse matrix in column major ordering 2717 */ 2718 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2719 2720 /* Compute total number of coarse nodes and setup coarse solver */ 2721 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2722 2723 /* free */ 2724 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2725 PetscFunctionReturn(0); 2726 } 2727 2728 #undef __FUNCT__ 2729 #define __FUNCT__ "PCBDDCResetCustomization" 2730 PetscErrorCode PCBDDCResetCustomization(PC pc) 2731 { 2732 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2733 PetscErrorCode ierr; 2734 2735 PetscFunctionBegin; 2736 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2737 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2738 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2739 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2740 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2741 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2742 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2743 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2744 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2745 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2746 PetscFunctionReturn(0); 2747 } 2748 2749 #undef __FUNCT__ 2750 #define __FUNCT__ "PCBDDCResetTopography" 2751 PetscErrorCode PCBDDCResetTopography(PC pc) 2752 { 2753 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2754 PetscInt i; 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2759 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2760 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2761 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2762 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2763 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2764 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2765 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2766 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2767 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2768 pcbddc->graphanalyzed = PETSC_FALSE; 2769 for (i=0;i<pcbddc->n_local_subs;i++) { 2770 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2771 } 2772 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2773 if (pcbddc->sub_schurs) { 2774 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2775 } 2776 PetscFunctionReturn(0); 2777 } 2778 2779 #undef __FUNCT__ 2780 #define __FUNCT__ "PCBDDCResetSolvers" 2781 PetscErrorCode PCBDDCResetSolvers(PC pc) 2782 { 2783 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2784 PetscErrorCode ierr; 2785 2786 PetscFunctionBegin; 2787 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2788 if (pcbddc->coarse_phi_B) { 2789 PetscScalar *array; 2790 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2791 ierr = PetscFree(array);CHKERRQ(ierr); 2792 } 2793 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2794 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2795 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2796 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2797 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2798 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2799 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2800 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2801 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2802 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2803 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2804 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2805 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2806 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2807 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2808 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2809 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2810 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2811 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2812 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2813 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2814 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2815 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2816 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2817 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2818 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2819 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2820 if (pcbddc->benign_zerodiag_subs) { 2821 PetscInt i; 2822 for (i=0;i<pcbddc->benign_n;i++) { 2823 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2824 } 2825 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2826 } 2827 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 #undef __FUNCT__ 2832 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2833 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2834 { 2835 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2836 PC_IS *pcis = (PC_IS*)pc->data; 2837 VecType impVecType; 2838 PetscInt n_constraints,n_R,old_size; 2839 PetscErrorCode ierr; 2840 2841 PetscFunctionBegin; 2842 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2843 /* get sizes */ 2844 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2845 n_R = pcis->n - pcbddc->n_vertices; 2846 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2847 /* local work vectors (try to avoid unneeded work)*/ 2848 /* R nodes */ 2849 old_size = -1; 2850 if (pcbddc->vec1_R) { 2851 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2852 } 2853 if (n_R != old_size) { 2854 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2855 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2856 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2857 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2858 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2859 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2860 } 2861 /* local primal dofs */ 2862 old_size = -1; 2863 if (pcbddc->vec1_P) { 2864 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2865 } 2866 if (pcbddc->local_primal_size != old_size) { 2867 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2868 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2869 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2870 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2871 } 2872 /* local explicit constraints */ 2873 old_size = -1; 2874 if (pcbddc->vec1_C) { 2875 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2876 } 2877 if (n_constraints && n_constraints != old_size) { 2878 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2879 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2880 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2881 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2882 } 2883 PetscFunctionReturn(0); 2884 } 2885 2886 #undef __FUNCT__ 2887 #define __FUNCT__ "PCBDDCSetUpCorrection" 2888 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2889 { 2890 PetscErrorCode ierr; 2891 /* pointers to pcis and pcbddc */ 2892 PC_IS* pcis = (PC_IS*)pc->data; 2893 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2894 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2895 /* submatrices of local problem */ 2896 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2897 /* submatrices of local coarse problem */ 2898 Mat S_VV,S_CV,S_VC,S_CC; 2899 /* working matrices */ 2900 Mat C_CR; 2901 /* additional working stuff */ 2902 PC pc_R; 2903 Mat F; 2904 Vec dummy_vec; 2905 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2906 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2907 PetscScalar *work; 2908 PetscInt *idx_V_B; 2909 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2910 PetscInt i,n_R,n_D,n_B; 2911 2912 /* some shortcuts to scalars */ 2913 PetscScalar one=1.0,m_one=-1.0; 2914 2915 PetscFunctionBegin; 2916 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 2917 2918 /* Set Non-overlapping dimensions */ 2919 n_vertices = pcbddc->n_vertices; 2920 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 2921 n_B = pcis->n_B; 2922 n_D = pcis->n - n_B; 2923 n_R = pcis->n - n_vertices; 2924 2925 /* vertices in boundary numbering */ 2926 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 2927 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 2928 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 2929 2930 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2931 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 2932 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 2933 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 2934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 2935 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 2936 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 2937 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 2938 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 2939 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 2940 2941 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 2942 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 2943 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 2944 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 2945 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 2946 lda_rhs = n_R; 2947 need_benign_correction = PETSC_FALSE; 2948 if (isLU || isILU || isCHOL) { 2949 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 2950 } else if (sub_schurs && sub_schurs->reuse_solver) { 2951 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2952 MatFactorType type; 2953 2954 F = reuse_solver->F; 2955 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 2956 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 2957 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 2958 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 2959 } else { 2960 F = NULL; 2961 } 2962 2963 /* allocate workspace */ 2964 n = 0; 2965 if (n_constraints) { 2966 n += lda_rhs*n_constraints; 2967 } 2968 if (n_vertices) { 2969 n = PetscMax(2*lda_rhs*n_vertices,n); 2970 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 2971 } 2972 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 2973 2974 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 2975 dummy_vec = NULL; 2976 if (need_benign_correction && lda_rhs != n_R && F) { 2977 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 2978 } 2979 2980 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2981 if (n_constraints) { 2982 Mat M1,M2,M3,C_B; 2983 IS is_aux; 2984 PetscScalar *array,*array2; 2985 2986 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2987 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2988 2989 /* Extract constraints on R nodes: C_{CR} */ 2990 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 2991 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2992 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2993 2994 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 2995 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 2996 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2997 for (i=0;i<n_constraints;i++) { 2998 const PetscScalar *row_cmat_values; 2999 const PetscInt *row_cmat_indices; 3000 PetscInt size_of_constraint,j; 3001 3002 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3003 for (j=0;j<size_of_constraint;j++) { 3004 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3005 } 3006 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3007 } 3008 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3009 if (F) { 3010 Mat B; 3011 3012 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3013 if (need_benign_correction) { 3014 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3015 3016 /* rhs is already zero on interior dofs, no need to change the rhs */ 3017 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3018 } 3019 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3020 if (need_benign_correction) { 3021 PetscScalar *marr; 3022 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3023 3024 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3025 if (lda_rhs != n_R) { 3026 for (i=0;i<n_constraints;i++) { 3027 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3028 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3029 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3030 } 3031 } else { 3032 for (i=0;i<n_constraints;i++) { 3033 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3034 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3035 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3036 } 3037 } 3038 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3039 } 3040 ierr = MatDestroy(&B);CHKERRQ(ierr); 3041 } else { 3042 PetscScalar *marr; 3043 3044 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3045 for (i=0;i<n_constraints;i++) { 3046 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3047 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3048 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3049 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3050 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3051 } 3052 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3053 } 3054 if (!pcbddc->switch_static) { 3055 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3056 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3057 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3058 for (i=0;i<n_constraints;i++) { 3059 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3060 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3061 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3062 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3063 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3064 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3065 } 3066 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3067 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3068 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3069 } else { 3070 if (lda_rhs != n_R) { 3071 IS dummy; 3072 3073 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3074 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3075 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3076 } else { 3077 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3078 pcbddc->local_auxmat2 = local_auxmat2_R; 3079 } 3080 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3081 } 3082 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3083 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3084 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3085 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3086 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3087 if (isCHOL) { 3088 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3089 } else { 3090 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3091 } 3092 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3093 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3094 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3095 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3096 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3097 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3098 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3099 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3100 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3101 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3102 } 3103 3104 /* Get submatrices from subdomain matrix */ 3105 if (n_vertices) { 3106 IS is_aux; 3107 3108 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3109 IS tis; 3110 3111 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3112 ierr = ISSort(tis);CHKERRQ(ierr); 3113 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3114 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3115 } else { 3116 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3117 } 3118 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3119 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3120 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3121 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3122 } 3123 3124 /* Matrix of coarse basis functions (local) */ 3125 if (pcbddc->coarse_phi_B) { 3126 PetscInt on_B,on_primal,on_D=n_D; 3127 if (pcbddc->coarse_phi_D) { 3128 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3129 } 3130 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3131 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3132 PetscScalar *marray; 3133 3134 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3135 ierr = PetscFree(marray);CHKERRQ(ierr); 3136 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3137 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3138 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3139 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3140 } 3141 } 3142 3143 if (!pcbddc->coarse_phi_B) { 3144 PetscScalar *marray; 3145 3146 n = n_B*pcbddc->local_primal_size; 3147 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3148 n += n_D*pcbddc->local_primal_size; 3149 } 3150 if (!pcbddc->symmetric_primal) { 3151 n *= 2; 3152 } 3153 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3154 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3155 n = n_B*pcbddc->local_primal_size; 3156 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3157 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3158 n += n_D*pcbddc->local_primal_size; 3159 } 3160 if (!pcbddc->symmetric_primal) { 3161 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3162 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3163 n = n_B*pcbddc->local_primal_size; 3164 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3165 } 3166 } else { 3167 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3168 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3169 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3170 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3171 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3172 } 3173 } 3174 } 3175 3176 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3177 p0_lidx_I = NULL; 3178 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3179 const PetscInt *idxs; 3180 3181 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3182 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3183 for (i=0;i<pcbddc->benign_n;i++) { 3184 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3185 } 3186 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3187 } 3188 3189 /* vertices */ 3190 if (n_vertices) { 3191 3192 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3193 3194 if (n_R) { 3195 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3196 PetscBLASInt B_N,B_one = 1; 3197 PetscScalar *x,*y; 3198 PetscBool isseqaij; 3199 3200 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3201 if (need_benign_correction) { 3202 ISLocalToGlobalMapping RtoN; 3203 IS is_p0; 3204 PetscInt *idxs_p0,n; 3205 3206 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3207 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3208 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3209 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3210 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3211 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3212 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3213 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3214 } 3215 3216 if (lda_rhs == n_R) { 3217 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3218 } else { 3219 PetscScalar *av,*array; 3220 const PetscInt *xadj,*adjncy; 3221 PetscInt n; 3222 PetscBool flg_row; 3223 3224 array = work+lda_rhs*n_vertices; 3225 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3226 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3227 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3228 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3229 for (i=0;i<n;i++) { 3230 PetscInt j; 3231 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3232 } 3233 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3234 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3235 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3236 } 3237 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3238 if (need_benign_correction) { 3239 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3240 PetscScalar *marr; 3241 3242 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3243 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3244 3245 | 0 0 0 | (V) 3246 L = | 0 0 -1 | (P-p0) 3247 | 0 0 -1 | (p0) 3248 3249 */ 3250 for (i=0;i<reuse_solver->benign_n;i++) { 3251 const PetscScalar *vals; 3252 const PetscInt *idxs,*idxs_zero; 3253 PetscInt n,j,nz; 3254 3255 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3256 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3257 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3258 for (j=0;j<n;j++) { 3259 PetscScalar val = vals[j]; 3260 PetscInt k,col = idxs[j]; 3261 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3262 } 3263 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3264 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3265 } 3266 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3267 } 3268 if (F) { 3269 /* need to correct the rhs */ 3270 if (need_benign_correction) { 3271 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3272 PetscScalar *marr; 3273 3274 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3275 if (lda_rhs != n_R) { 3276 for (i=0;i<n_vertices;i++) { 3277 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3278 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3279 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3280 } 3281 } else { 3282 for (i=0;i<n_vertices;i++) { 3283 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3284 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3285 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3286 } 3287 } 3288 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3289 } 3290 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3291 /* need to correct the solution */ 3292 if (need_benign_correction) { 3293 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3294 PetscScalar *marr; 3295 3296 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3297 if (lda_rhs != n_R) { 3298 for (i=0;i<n_vertices;i++) { 3299 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3300 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3301 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3302 } 3303 } else { 3304 for (i=0;i<n_vertices;i++) { 3305 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3306 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3307 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3308 } 3309 } 3310 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3311 } 3312 } else { 3313 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3314 for (i=0;i<n_vertices;i++) { 3315 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3316 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3317 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3318 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3319 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3320 } 3321 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3322 } 3323 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3324 /* S_VV and S_CV */ 3325 if (n_constraints) { 3326 Mat B; 3327 3328 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3329 for (i=0;i<n_vertices;i++) { 3330 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3331 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3332 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3333 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3334 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3335 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3336 } 3337 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3338 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3339 ierr = MatDestroy(&B);CHKERRQ(ierr); 3340 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3341 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3342 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3343 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3344 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3345 ierr = MatDestroy(&B);CHKERRQ(ierr); 3346 } 3347 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3348 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3349 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3350 } 3351 if (lda_rhs != n_R) { 3352 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3353 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3354 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3355 } 3356 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3357 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3358 if (need_benign_correction) { 3359 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3360 PetscScalar *marr,*sums; 3361 3362 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3363 ierr = MatDenseGetArray(S_VVt,&marr); 3364 for (i=0;i<reuse_solver->benign_n;i++) { 3365 const PetscScalar *vals; 3366 const PetscInt *idxs,*idxs_zero; 3367 PetscInt n,j,nz; 3368 3369 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3370 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3371 for (j=0;j<n_vertices;j++) { 3372 PetscInt k; 3373 sums[j] = 0.; 3374 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3375 } 3376 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3377 for (j=0;j<n;j++) { 3378 PetscScalar val = vals[j]; 3379 PetscInt k; 3380 for (k=0;k<n_vertices;k++) { 3381 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3382 } 3383 } 3384 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3385 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3386 } 3387 ierr = PetscFree(sums);CHKERRQ(ierr); 3388 ierr = MatDenseRestoreArray(S_VVt,&marr); 3389 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3390 } 3391 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3392 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3393 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3394 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3395 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3396 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3397 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3398 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3399 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3400 } else { 3401 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3402 } 3403 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3404 3405 /* coarse basis functions */ 3406 for (i=0;i<n_vertices;i++) { 3407 PetscScalar *y; 3408 3409 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3410 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3411 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3412 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3413 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3414 y[n_B*i+idx_V_B[i]] = 1.0; 3415 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3416 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3417 3418 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3419 PetscInt j; 3420 3421 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3422 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3423 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3424 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3425 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3426 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3427 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3428 } 3429 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3430 } 3431 /* if n_R == 0 the object is not destroyed */ 3432 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3433 } 3434 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3435 3436 if (n_constraints) { 3437 Mat B; 3438 3439 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3440 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3441 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3442 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3443 if (n_vertices) { 3444 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3445 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3446 } else { 3447 Mat S_VCt; 3448 3449 if (lda_rhs != n_R) { 3450 ierr = MatDestroy(&B);CHKERRQ(ierr); 3451 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3452 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3453 } 3454 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3455 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3456 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3457 } 3458 } 3459 ierr = MatDestroy(&B);CHKERRQ(ierr); 3460 /* coarse basis functions */ 3461 for (i=0;i<n_constraints;i++) { 3462 PetscScalar *y; 3463 3464 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3465 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3466 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3467 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3468 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3469 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3470 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3471 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3472 PetscInt j; 3473 3474 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3475 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3476 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3477 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3478 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3479 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3480 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3481 } 3482 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3483 } 3484 } 3485 if (n_constraints) { 3486 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3487 } 3488 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3489 3490 /* coarse matrix entries relative to B_0 */ 3491 if (pcbddc->benign_n) { 3492 Mat B0_B,B0_BPHI; 3493 IS is_dummy; 3494 PetscScalar *data; 3495 PetscInt j; 3496 3497 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3498 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3499 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3500 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3501 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3502 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3503 for (j=0;j<pcbddc->benign_n;j++) { 3504 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3505 for (i=0;i<pcbddc->local_primal_size;i++) { 3506 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3507 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3508 } 3509 } 3510 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3511 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3512 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3513 } 3514 3515 /* compute other basis functions for non-symmetric problems */ 3516 if (!pcbddc->symmetric_primal) { 3517 Mat B_V=NULL,B_C=NULL; 3518 PetscScalar *marray; 3519 3520 if (n_constraints) { 3521 Mat S_CCT,C_CRT; 3522 3523 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3524 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3525 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3526 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3527 if (n_vertices) { 3528 Mat S_VCT; 3529 3530 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3531 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3532 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3533 } 3534 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3535 } else { 3536 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3537 } 3538 if (n_vertices && n_R) { 3539 PetscScalar *av,*marray; 3540 const PetscInt *xadj,*adjncy; 3541 PetscInt n; 3542 PetscBool flg_row; 3543 3544 /* B_V = B_V - A_VR^T */ 3545 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3546 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3547 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3548 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3549 for (i=0;i<n;i++) { 3550 PetscInt j; 3551 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3552 } 3553 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3554 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3555 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3556 } 3557 3558 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3559 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3560 for (i=0;i<n_vertices;i++) { 3561 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3562 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3563 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3564 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3565 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3566 } 3567 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3568 if (B_C) { 3569 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3570 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3571 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3572 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3573 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3574 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3575 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3576 } 3577 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3578 } 3579 /* coarse basis functions */ 3580 for (i=0;i<pcbddc->local_primal_size;i++) { 3581 PetscScalar *y; 3582 3583 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3584 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3585 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3586 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3587 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3588 if (i<n_vertices) { 3589 y[n_B*i+idx_V_B[i]] = 1.0; 3590 } 3591 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3592 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3593 3594 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3595 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3596 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3597 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3598 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3599 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3600 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3601 } 3602 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3603 } 3604 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3605 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3606 } 3607 /* free memory */ 3608 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3609 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3610 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3611 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3612 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3613 ierr = PetscFree(work);CHKERRQ(ierr); 3614 if (n_vertices) { 3615 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3616 } 3617 if (n_constraints) { 3618 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3619 } 3620 /* Checking coarse_sub_mat and coarse basis functios */ 3621 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3622 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3623 if (pcbddc->dbg_flag) { 3624 Mat coarse_sub_mat; 3625 Mat AUXMAT,TM1,TM2,TM3,TM4; 3626 Mat coarse_phi_D,coarse_phi_B; 3627 Mat coarse_psi_D,coarse_psi_B; 3628 Mat A_II,A_BB,A_IB,A_BI; 3629 Mat C_B,CPHI; 3630 IS is_dummy; 3631 Vec mones; 3632 MatType checkmattype=MATSEQAIJ; 3633 PetscReal real_value; 3634 3635 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3636 Mat A; 3637 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3638 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3639 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3640 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3641 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3642 ierr = MatDestroy(&A);CHKERRQ(ierr); 3643 } else { 3644 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3645 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3646 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3647 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3648 } 3649 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3650 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3651 if (!pcbddc->symmetric_primal) { 3652 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3653 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3654 } 3655 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3656 3657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3660 if (!pcbddc->symmetric_primal) { 3661 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3662 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3663 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3664 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3665 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3666 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3667 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3668 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3669 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3670 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3671 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3672 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3673 } else { 3674 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3675 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3676 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3677 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3678 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3679 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3680 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3681 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3682 } 3683 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3684 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3685 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3686 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3687 if (pcbddc->benign_n) { 3688 Mat B0_B,B0_BPHI; 3689 PetscScalar *data,*data2; 3690 PetscInt j; 3691 3692 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3693 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3694 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3695 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3696 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3697 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3698 for (j=0;j<pcbddc->benign_n;j++) { 3699 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3700 for (i=0;i<pcbddc->local_primal_size;i++) { 3701 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3702 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3703 } 3704 } 3705 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3706 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3707 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3708 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3709 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3710 } 3711 #if 0 3712 { 3713 PetscViewer viewer; 3714 char filename[256]; 3715 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3716 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3717 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3718 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3719 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3720 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3721 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3722 if (save_change) { 3723 Mat phi_B; 3724 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3725 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3726 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3727 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3728 } else { 3729 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3730 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3731 } 3732 if (pcbddc->coarse_phi_D) { 3733 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3734 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3735 } 3736 if (pcbddc->coarse_psi_B) { 3737 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3738 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3739 } 3740 if (pcbddc->coarse_psi_D) { 3741 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3742 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3743 } 3744 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3745 } 3746 #endif 3747 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3748 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3749 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3750 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3751 3752 /* check constraints */ 3753 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3754 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3755 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3756 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3757 } else { 3758 PetscScalar *data; 3759 Mat tmat; 3760 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3761 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3762 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3763 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3764 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3765 } 3766 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3767 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3768 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3769 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3770 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3771 if (!pcbddc->symmetric_primal) { 3772 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3773 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3774 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3775 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3776 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3777 } 3778 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3779 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3780 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3781 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3782 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3783 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3784 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3785 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3786 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3787 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3788 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3789 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3790 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3791 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3792 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3793 if (!pcbddc->symmetric_primal) { 3794 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3795 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3796 } 3797 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3798 } 3799 /* get back data */ 3800 *coarse_submat_vals_n = coarse_submat_vals; 3801 PetscFunctionReturn(0); 3802 } 3803 3804 #undef __FUNCT__ 3805 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3806 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3807 { 3808 Mat *work_mat; 3809 IS isrow_s,iscol_s; 3810 PetscBool rsorted,csorted; 3811 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3812 PetscErrorCode ierr; 3813 3814 PetscFunctionBegin; 3815 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3816 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3817 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3818 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3819 3820 if (!rsorted) { 3821 const PetscInt *idxs; 3822 PetscInt *idxs_sorted,i; 3823 3824 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3825 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3826 for (i=0;i<rsize;i++) { 3827 idxs_perm_r[i] = i; 3828 } 3829 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3830 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3831 for (i=0;i<rsize;i++) { 3832 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3833 } 3834 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3835 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3836 } else { 3837 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3838 isrow_s = isrow; 3839 } 3840 3841 if (!csorted) { 3842 if (isrow == iscol) { 3843 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3844 iscol_s = isrow_s; 3845 } else { 3846 const PetscInt *idxs; 3847 PetscInt *idxs_sorted,i; 3848 3849 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3850 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3851 for (i=0;i<csize;i++) { 3852 idxs_perm_c[i] = i; 3853 } 3854 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3855 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3856 for (i=0;i<csize;i++) { 3857 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3858 } 3859 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3860 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3861 } 3862 } else { 3863 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3864 iscol_s = iscol; 3865 } 3866 3867 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3868 3869 if (!rsorted || !csorted) { 3870 Mat new_mat; 3871 IS is_perm_r,is_perm_c; 3872 3873 if (!rsorted) { 3874 PetscInt *idxs_r,i; 3875 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3876 for (i=0;i<rsize;i++) { 3877 idxs_r[idxs_perm_r[i]] = i; 3878 } 3879 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3880 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3881 } else { 3882 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3883 } 3884 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3885 3886 if (!csorted) { 3887 if (isrow_s == iscol_s) { 3888 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3889 is_perm_c = is_perm_r; 3890 } else { 3891 PetscInt *idxs_c,i; 3892 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3893 for (i=0;i<csize;i++) { 3894 idxs_c[idxs_perm_c[i]] = i; 3895 } 3896 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3897 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3898 } 3899 } else { 3900 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3901 } 3902 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3903 3904 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3905 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3906 work_mat[0] = new_mat; 3907 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3908 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3909 } 3910 3911 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 3912 *B = work_mat[0]; 3913 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 3914 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 3915 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 3916 PetscFunctionReturn(0); 3917 } 3918 3919 #undef __FUNCT__ 3920 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 3921 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 3922 { 3923 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3924 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3925 Mat new_mat; 3926 IS is_local,is_global; 3927 PetscInt local_size; 3928 PetscBool isseqaij; 3929 PetscErrorCode ierr; 3930 3931 PetscFunctionBegin; 3932 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3933 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 3934 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 3935 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 3936 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 3937 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 3938 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 3939 3940 /* check */ 3941 if (pcbddc->dbg_flag) { 3942 Vec x,x_change; 3943 PetscReal error; 3944 3945 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 3946 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3947 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 3948 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3949 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3950 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 3951 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3952 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3953 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3954 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3955 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3956 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 3957 ierr = VecDestroy(&x);CHKERRQ(ierr); 3958 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3959 } 3960 3961 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 3962 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3963 if (isseqaij) { 3964 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3965 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3966 } else { 3967 Mat work_mat; 3968 3969 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3970 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3971 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3972 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 3973 } 3974 if (matis->A->symmetric_set) { 3975 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 3976 #if !defined(PETSC_USE_COMPLEX) 3977 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 3978 #endif 3979 } 3980 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 3981 PetscFunctionReturn(0); 3982 } 3983 3984 #undef __FUNCT__ 3985 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 3986 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 3987 { 3988 PC_IS* pcis = (PC_IS*)(pc->data); 3989 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3990 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3991 PetscInt *idx_R_local=NULL; 3992 PetscInt n_vertices,i,j,n_R,n_D,n_B; 3993 PetscInt vbs,bs; 3994 PetscBT bitmask=NULL; 3995 PetscErrorCode ierr; 3996 3997 PetscFunctionBegin; 3998 /* 3999 No need to setup local scatters if 4000 - primal space is unchanged 4001 AND 4002 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4003 AND 4004 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4005 */ 4006 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4007 PetscFunctionReturn(0); 4008 } 4009 /* destroy old objects */ 4010 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4011 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4012 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4013 /* Set Non-overlapping dimensions */ 4014 n_B = pcis->n_B; 4015 n_D = pcis->n - n_B; 4016 n_vertices = pcbddc->n_vertices; 4017 4018 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4019 4020 /* create auxiliary bitmask and allocate workspace */ 4021 if (!sub_schurs || !sub_schurs->reuse_solver) { 4022 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4023 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4024 for (i=0;i<n_vertices;i++) { 4025 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4026 } 4027 4028 for (i=0, n_R=0; i<pcis->n; i++) { 4029 if (!PetscBTLookup(bitmask,i)) { 4030 idx_R_local[n_R++] = i; 4031 } 4032 } 4033 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4034 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4035 4036 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4037 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4038 } 4039 4040 /* Block code */ 4041 vbs = 1; 4042 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4043 if (bs>1 && !(n_vertices%bs)) { 4044 PetscBool is_blocked = PETSC_TRUE; 4045 PetscInt *vary; 4046 if (!sub_schurs || !sub_schurs->reuse_solver) { 4047 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4048 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4049 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4050 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4051 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4052 for (i=0; i<pcis->n/bs; i++) { 4053 if (vary[i]!=0 && vary[i]!=bs) { 4054 is_blocked = PETSC_FALSE; 4055 break; 4056 } 4057 } 4058 ierr = PetscFree(vary);CHKERRQ(ierr); 4059 } else { 4060 /* Verify directly the R set */ 4061 for (i=0; i<n_R/bs; i++) { 4062 PetscInt j,node=idx_R_local[bs*i]; 4063 for (j=1; j<bs; j++) { 4064 if (node != idx_R_local[bs*i+j]-j) { 4065 is_blocked = PETSC_FALSE; 4066 break; 4067 } 4068 } 4069 } 4070 } 4071 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4072 vbs = bs; 4073 for (i=0;i<n_R/vbs;i++) { 4074 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4075 } 4076 } 4077 } 4078 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4079 if (sub_schurs && sub_schurs->reuse_solver) { 4080 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4081 4082 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4083 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4084 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4085 reuse_solver->is_R = pcbddc->is_R_local; 4086 } else { 4087 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4088 } 4089 4090 /* print some info if requested */ 4091 if (pcbddc->dbg_flag) { 4092 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4093 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4094 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4095 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4096 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4097 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4099 } 4100 4101 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4102 if (!sub_schurs || !sub_schurs->reuse_solver) { 4103 IS is_aux1,is_aux2; 4104 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4105 4106 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4107 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4108 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4109 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4110 for (i=0; i<n_D; i++) { 4111 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4112 } 4113 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4114 for (i=0, j=0; i<n_R; i++) { 4115 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4116 aux_array1[j++] = i; 4117 } 4118 } 4119 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4120 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4121 for (i=0, j=0; i<n_B; i++) { 4122 if (!PetscBTLookup(bitmask,is_indices[i])) { 4123 aux_array2[j++] = i; 4124 } 4125 } 4126 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4127 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4128 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4129 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4130 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4131 4132 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4133 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4134 for (i=0, j=0; i<n_R; i++) { 4135 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4136 aux_array1[j++] = i; 4137 } 4138 } 4139 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4140 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4141 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4142 } 4143 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4144 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4145 } else { 4146 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4147 IS tis; 4148 PetscInt schur_size; 4149 4150 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4151 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4152 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4153 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4154 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4155 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4156 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4157 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4158 } 4159 } 4160 PetscFunctionReturn(0); 4161 } 4162 4163 4164 #undef __FUNCT__ 4165 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4166 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4167 { 4168 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4169 PC_IS *pcis = (PC_IS*)pc->data; 4170 PC pc_temp; 4171 Mat A_RR; 4172 MatReuse reuse; 4173 PetscScalar m_one = -1.0; 4174 PetscReal value; 4175 PetscInt n_D,n_R; 4176 PetscBool check_corr[2],issbaij; 4177 PetscErrorCode ierr; 4178 /* prefixes stuff */ 4179 char dir_prefix[256],neu_prefix[256],str_level[16]; 4180 size_t len; 4181 4182 PetscFunctionBegin; 4183 4184 /* compute prefixes */ 4185 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4186 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4187 if (!pcbddc->current_level) { 4188 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4189 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4190 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4191 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4192 } else { 4193 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4194 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4195 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4196 len -= 15; /* remove "pc_bddc_coarse_" */ 4197 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4198 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4199 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4200 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4201 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4202 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4203 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4204 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4205 } 4206 4207 /* DIRICHLET PROBLEM */ 4208 if (dirichlet) { 4209 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4210 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4211 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4212 if (pcbddc->dbg_flag) { 4213 Mat A_IIn; 4214 4215 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4216 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4217 pcis->A_II = A_IIn; 4218 } 4219 } 4220 if (pcbddc->local_mat->symmetric_set) { 4221 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4222 } 4223 /* Matrix for Dirichlet problem is pcis->A_II */ 4224 n_D = pcis->n - pcis->n_B; 4225 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4226 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4227 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4228 /* default */ 4229 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4230 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4231 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4232 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4233 if (issbaij) { 4234 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4235 } else { 4236 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4237 } 4238 /* Allow user's customization */ 4239 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4240 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4241 } 4242 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4243 if (sub_schurs && sub_schurs->reuse_solver) { 4244 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4245 4246 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4247 } 4248 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4249 if (!n_D) { 4250 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4251 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4252 } 4253 /* Set Up KSP for Dirichlet problem of BDDC */ 4254 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4255 /* set ksp_D into pcis data */ 4256 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4257 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4258 pcis->ksp_D = pcbddc->ksp_D; 4259 } 4260 4261 /* NEUMANN PROBLEM */ 4262 A_RR = 0; 4263 if (neumann) { 4264 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4265 PetscInt ibs,mbs; 4266 PetscBool issbaij; 4267 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4268 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4269 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4270 if (pcbddc->ksp_R) { /* already created ksp */ 4271 PetscInt nn_R; 4272 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4273 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4274 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4275 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4276 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4277 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4278 reuse = MAT_INITIAL_MATRIX; 4279 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4280 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4281 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4282 reuse = MAT_INITIAL_MATRIX; 4283 } else { /* safe to reuse the matrix */ 4284 reuse = MAT_REUSE_MATRIX; 4285 } 4286 } 4287 /* last check */ 4288 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4289 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4290 reuse = MAT_INITIAL_MATRIX; 4291 } 4292 } else { /* first time, so we need to create the matrix */ 4293 reuse = MAT_INITIAL_MATRIX; 4294 } 4295 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4296 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4297 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4298 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4299 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4300 if (matis->A == pcbddc->local_mat) { 4301 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4302 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4303 } else { 4304 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4305 } 4306 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4307 if (matis->A == pcbddc->local_mat) { 4308 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4309 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4310 } else { 4311 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4312 } 4313 } 4314 /* extract A_RR */ 4315 if (sub_schurs && sub_schurs->reuse_solver) { 4316 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4317 4318 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4319 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4320 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4321 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4322 } else { 4323 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4324 } 4325 } else { 4326 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4327 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4328 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4329 } 4330 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4331 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4332 } 4333 if (pcbddc->local_mat->symmetric_set) { 4334 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4335 } 4336 if (!pcbddc->ksp_R) { /* create object if not present */ 4337 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4338 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4339 /* default */ 4340 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4341 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4342 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4343 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4344 if (issbaij) { 4345 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4346 } else { 4347 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4348 } 4349 /* Allow user's customization */ 4350 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4351 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4352 } 4353 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4354 if (!n_R) { 4355 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4356 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4357 } 4358 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4359 /* Reuse solver if it is present */ 4360 if (sub_schurs && sub_schurs->reuse_solver) { 4361 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4362 4363 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4364 } 4365 /* Set Up KSP for Neumann problem of BDDC */ 4366 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4367 } 4368 4369 if (pcbddc->dbg_flag) { 4370 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4371 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4372 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4373 } 4374 4375 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4376 check_corr[0] = check_corr[1] = PETSC_FALSE; 4377 if (pcbddc->NullSpace_corr[0]) { 4378 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4379 } 4380 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4381 check_corr[0] = PETSC_TRUE; 4382 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4383 } 4384 if (neumann && pcbddc->NullSpace_corr[2]) { 4385 check_corr[1] = PETSC_TRUE; 4386 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4387 } 4388 4389 /* check Dirichlet and Neumann solvers */ 4390 if (pcbddc->dbg_flag) { 4391 if (dirichlet) { /* Dirichlet */ 4392 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4393 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4394 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4395 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4396 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4397 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4398 if (check_corr[0]) { 4399 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4400 } 4401 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4402 } 4403 if (neumann) { /* Neumann */ 4404 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4405 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4406 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4407 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4408 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4409 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4410 if (check_corr[1]) { 4411 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4412 } 4413 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4414 } 4415 } 4416 /* free Neumann problem's matrix */ 4417 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4418 PetscFunctionReturn(0); 4419 } 4420 4421 #undef __FUNCT__ 4422 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4423 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4424 { 4425 PetscErrorCode ierr; 4426 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4427 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4428 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4429 4430 PetscFunctionBegin; 4431 if (!reuse_solver) { 4432 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4433 } 4434 if (!pcbddc->switch_static) { 4435 if (applytranspose && pcbddc->local_auxmat1) { 4436 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4437 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4438 } 4439 if (!reuse_solver) { 4440 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4441 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4442 } else { 4443 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4444 4445 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4446 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4447 } 4448 } else { 4449 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4450 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4451 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4452 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4453 if (applytranspose && pcbddc->local_auxmat1) { 4454 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4455 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4456 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4457 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4458 } 4459 } 4460 if (!reuse_solver || pcbddc->switch_static) { 4461 if (applytranspose) { 4462 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4463 } else { 4464 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4465 } 4466 } else { 4467 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4468 4469 if (applytranspose) { 4470 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4471 } else { 4472 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4473 } 4474 } 4475 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4476 if (!pcbddc->switch_static) { 4477 if (!reuse_solver) { 4478 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4479 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4480 } else { 4481 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4482 4483 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4484 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4485 } 4486 if (!applytranspose && pcbddc->local_auxmat1) { 4487 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4488 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4489 } 4490 } else { 4491 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4492 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4493 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4494 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4495 if (!applytranspose && pcbddc->local_auxmat1) { 4496 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4497 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4498 } 4499 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4500 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4501 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4502 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4503 } 4504 PetscFunctionReturn(0); 4505 } 4506 4507 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4508 #undef __FUNCT__ 4509 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4510 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4511 { 4512 PetscErrorCode ierr; 4513 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4514 PC_IS* pcis = (PC_IS*) (pc->data); 4515 const PetscScalar zero = 0.0; 4516 4517 PetscFunctionBegin; 4518 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4519 if (!pcbddc->benign_apply_coarse_only) { 4520 if (applytranspose) { 4521 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4522 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4523 } else { 4524 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4525 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4526 } 4527 } else { 4528 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4529 } 4530 4531 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4532 if (pcbddc->benign_n) { 4533 PetscScalar *array; 4534 PetscInt j; 4535 4536 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4537 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4538 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4539 } 4540 4541 /* start communications from local primal nodes to rhs of coarse solver */ 4542 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4543 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4544 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4545 4546 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4547 if (pcbddc->coarse_ksp) { 4548 Mat coarse_mat; 4549 Vec rhs,sol; 4550 MatNullSpace nullsp; 4551 PetscBool isbddc = PETSC_FALSE; 4552 4553 if (pcbddc->benign_have_null) { 4554 PC coarse_pc; 4555 4556 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4557 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4558 /* we need to propagate to coarser levels the need for a possible benign correction */ 4559 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4560 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4561 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4562 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4563 } 4564 } 4565 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4566 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4567 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4568 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4569 if (nullsp) { 4570 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4571 } 4572 if (applytranspose) { 4573 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4574 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4575 } else { 4576 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4577 PC coarse_pc; 4578 4579 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4580 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4581 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4582 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4583 } else { 4584 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4585 } 4586 } 4587 /* we don't need the benign correction at coarser levels anymore */ 4588 if (pcbddc->benign_have_null && isbddc) { 4589 PC coarse_pc; 4590 PC_BDDC* coarsepcbddc; 4591 4592 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4593 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4594 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4595 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4596 } 4597 if (nullsp) { 4598 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4599 } 4600 } 4601 4602 /* Local solution on R nodes */ 4603 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4604 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4605 } 4606 /* communications from coarse sol to local primal nodes */ 4607 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4608 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4609 4610 /* Sum contributions from the two levels */ 4611 if (!pcbddc->benign_apply_coarse_only) { 4612 if (applytranspose) { 4613 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4614 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4615 } else { 4616 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4617 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4618 } 4619 /* store p0 */ 4620 if (pcbddc->benign_n) { 4621 PetscScalar *array; 4622 PetscInt j; 4623 4624 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4625 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4626 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4627 } 4628 } else { /* expand the coarse solution */ 4629 if (applytranspose) { 4630 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4631 } else { 4632 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4633 } 4634 } 4635 PetscFunctionReturn(0); 4636 } 4637 4638 #undef __FUNCT__ 4639 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4640 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4641 { 4642 PetscErrorCode ierr; 4643 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4644 PetscScalar *array; 4645 Vec from,to; 4646 4647 PetscFunctionBegin; 4648 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4649 from = pcbddc->coarse_vec; 4650 to = pcbddc->vec1_P; 4651 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4652 Vec tvec; 4653 4654 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4655 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4656 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4657 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4658 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4659 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4660 } 4661 } else { /* from local to global -> put data in coarse right hand side */ 4662 from = pcbddc->vec1_P; 4663 to = pcbddc->coarse_vec; 4664 } 4665 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4666 PetscFunctionReturn(0); 4667 } 4668 4669 #undef __FUNCT__ 4670 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4671 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4672 { 4673 PetscErrorCode ierr; 4674 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4675 PetscScalar *array; 4676 Vec from,to; 4677 4678 PetscFunctionBegin; 4679 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4680 from = pcbddc->coarse_vec; 4681 to = pcbddc->vec1_P; 4682 } else { /* from local to global -> put data in coarse right hand side */ 4683 from = pcbddc->vec1_P; 4684 to = pcbddc->coarse_vec; 4685 } 4686 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4687 if (smode == SCATTER_FORWARD) { 4688 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4689 Vec tvec; 4690 4691 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4692 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4693 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4694 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4695 } 4696 } else { 4697 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4698 ierr = VecResetArray(from);CHKERRQ(ierr); 4699 } 4700 } 4701 PetscFunctionReturn(0); 4702 } 4703 4704 /* uncomment for testing purposes */ 4705 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4706 #undef __FUNCT__ 4707 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4708 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4709 { 4710 PetscErrorCode ierr; 4711 PC_IS* pcis = (PC_IS*)(pc->data); 4712 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4713 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4714 /* one and zero */ 4715 PetscScalar one=1.0,zero=0.0; 4716 /* space to store constraints and their local indices */ 4717 PetscScalar *constraints_data; 4718 PetscInt *constraints_idxs,*constraints_idxs_B; 4719 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4720 PetscInt *constraints_n; 4721 /* iterators */ 4722 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4723 /* BLAS integers */ 4724 PetscBLASInt lwork,lierr; 4725 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4726 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4727 /* reuse */ 4728 PetscInt olocal_primal_size,olocal_primal_size_cc; 4729 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4730 /* change of basis */ 4731 PetscBool qr_needed; 4732 PetscBT change_basis,qr_needed_idx; 4733 /* auxiliary stuff */ 4734 PetscInt *nnz,*is_indices; 4735 PetscInt ncc; 4736 /* some quantities */ 4737 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4738 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4739 4740 PetscFunctionBegin; 4741 /* Destroy Mat objects computed previously */ 4742 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4743 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4744 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4745 /* save info on constraints from previous setup (if any) */ 4746 olocal_primal_size = pcbddc->local_primal_size; 4747 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4748 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4749 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4750 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4751 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4752 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4753 4754 if (!pcbddc->adaptive_selection) { 4755 IS ISForVertices,*ISForFaces,*ISForEdges; 4756 MatNullSpace nearnullsp; 4757 const Vec *nearnullvecs; 4758 Vec *localnearnullsp; 4759 PetscScalar *array; 4760 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4761 PetscBool nnsp_has_cnst; 4762 /* LAPACK working arrays for SVD or POD */ 4763 PetscBool skip_lapack,boolforchange; 4764 PetscScalar *work; 4765 PetscReal *singular_vals; 4766 #if defined(PETSC_USE_COMPLEX) 4767 PetscReal *rwork; 4768 #endif 4769 #if defined(PETSC_MISSING_LAPACK_GESVD) 4770 PetscScalar *temp_basis,*correlation_mat; 4771 #else 4772 PetscBLASInt dummy_int=1; 4773 PetscScalar dummy_scalar=1.; 4774 #endif 4775 4776 /* Get index sets for faces, edges and vertices from graph */ 4777 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4778 /* print some info */ 4779 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 4780 PetscInt nv; 4781 4782 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4783 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4784 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4785 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4786 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4787 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4788 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4789 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4790 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4791 } 4792 4793 /* free unneeded index sets */ 4794 if (!pcbddc->use_vertices) { 4795 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4796 } 4797 if (!pcbddc->use_edges) { 4798 for (i=0;i<n_ISForEdges;i++) { 4799 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4800 } 4801 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4802 n_ISForEdges = 0; 4803 } 4804 if (!pcbddc->use_faces) { 4805 for (i=0;i<n_ISForFaces;i++) { 4806 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4807 } 4808 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4809 n_ISForFaces = 0; 4810 } 4811 4812 /* check if near null space is attached to global mat */ 4813 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4814 if (nearnullsp) { 4815 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4816 /* remove any stored info */ 4817 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4818 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4819 /* store information for BDDC solver reuse */ 4820 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4821 pcbddc->onearnullspace = nearnullsp; 4822 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4823 for (i=0;i<nnsp_size;i++) { 4824 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4825 } 4826 } else { /* if near null space is not provided BDDC uses constants by default */ 4827 nnsp_size = 0; 4828 nnsp_has_cnst = PETSC_TRUE; 4829 } 4830 /* get max number of constraints on a single cc */ 4831 max_constraints = nnsp_size; 4832 if (nnsp_has_cnst) max_constraints++; 4833 4834 /* 4835 Evaluate maximum storage size needed by the procedure 4836 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4837 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4838 There can be multiple constraints per connected component 4839 */ 4840 n_vertices = 0; 4841 if (ISForVertices) { 4842 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4843 } 4844 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4845 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4846 4847 total_counts = n_ISForFaces+n_ISForEdges; 4848 total_counts *= max_constraints; 4849 total_counts += n_vertices; 4850 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4851 4852 total_counts = 0; 4853 max_size_of_constraint = 0; 4854 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4855 IS used_is; 4856 if (i<n_ISForEdges) { 4857 used_is = ISForEdges[i]; 4858 } else { 4859 used_is = ISForFaces[i-n_ISForEdges]; 4860 } 4861 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4862 total_counts += j; 4863 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4864 } 4865 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4866 4867 /* get local part of global near null space vectors */ 4868 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4869 for (k=0;k<nnsp_size;k++) { 4870 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4871 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4872 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4873 } 4874 4875 /* whether or not to skip lapack calls */ 4876 skip_lapack = PETSC_TRUE; 4877 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4878 4879 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4880 if (!skip_lapack) { 4881 PetscScalar temp_work; 4882 4883 #if defined(PETSC_MISSING_LAPACK_GESVD) 4884 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4885 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4886 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4887 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4888 #if defined(PETSC_USE_COMPLEX) 4889 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4890 #endif 4891 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4892 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4893 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4894 lwork = -1; 4895 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4896 #if !defined(PETSC_USE_COMPLEX) 4897 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4898 #else 4899 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4900 #endif 4901 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4902 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4903 #else /* on missing GESVD */ 4904 /* SVD */ 4905 PetscInt max_n,min_n; 4906 max_n = max_size_of_constraint; 4907 min_n = max_constraints; 4908 if (max_size_of_constraint < max_constraints) { 4909 min_n = max_size_of_constraint; 4910 max_n = max_constraints; 4911 } 4912 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 4913 #if defined(PETSC_USE_COMPLEX) 4914 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 4915 #endif 4916 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4917 lwork = -1; 4918 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 4919 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 4920 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 4921 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4922 #if !defined(PETSC_USE_COMPLEX) 4923 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 4924 #else 4925 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 4926 #endif 4927 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4928 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 4929 #endif /* on missing GESVD */ 4930 /* Allocate optimal workspace */ 4931 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 4932 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 4933 } 4934 /* Now we can loop on constraining sets */ 4935 total_counts = 0; 4936 constraints_idxs_ptr[0] = 0; 4937 constraints_data_ptr[0] = 0; 4938 /* vertices */ 4939 if (n_vertices) { 4940 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4941 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4942 for (i=0;i<n_vertices;i++) { 4943 constraints_n[total_counts] = 1; 4944 constraints_data[total_counts] = 1.0; 4945 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 4946 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 4947 total_counts++; 4948 } 4949 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4950 n_vertices = total_counts; 4951 } 4952 4953 /* edges and faces */ 4954 total_counts_cc = total_counts; 4955 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 4956 IS used_is; 4957 PetscBool idxs_copied = PETSC_FALSE; 4958 4959 if (ncc<n_ISForEdges) { 4960 used_is = ISForEdges[ncc]; 4961 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 4962 } else { 4963 used_is = ISForFaces[ncc-n_ISForEdges]; 4964 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 4965 } 4966 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 4967 4968 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 4969 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4970 /* change of basis should not be performed on local periodic nodes */ 4971 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 4972 if (nnsp_has_cnst) { 4973 PetscScalar quad_value; 4974 4975 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4976 idxs_copied = PETSC_TRUE; 4977 4978 if (!pcbddc->use_nnsp_true) { 4979 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 4980 } else { 4981 quad_value = 1.0; 4982 } 4983 for (j=0;j<size_of_constraint;j++) { 4984 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 4985 } 4986 temp_constraints++; 4987 total_counts++; 4988 } 4989 for (k=0;k<nnsp_size;k++) { 4990 PetscReal real_value; 4991 PetscScalar *ptr_to_data; 4992 4993 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4994 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 4995 for (j=0;j<size_of_constraint;j++) { 4996 ptr_to_data[j] = array[is_indices[j]]; 4997 } 4998 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4999 /* check if array is null on the connected component */ 5000 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5001 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5002 if (real_value > 0.0) { /* keep indices and values */ 5003 temp_constraints++; 5004 total_counts++; 5005 if (!idxs_copied) { 5006 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5007 idxs_copied = PETSC_TRUE; 5008 } 5009 } 5010 } 5011 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5012 valid_constraints = temp_constraints; 5013 if (!pcbddc->use_nnsp_true && temp_constraints) { 5014 if (temp_constraints == 1) { /* just normalize the constraint */ 5015 PetscScalar norm,*ptr_to_data; 5016 5017 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5018 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5019 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5020 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5021 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5022 } else { /* perform SVD */ 5023 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5024 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5025 5026 #if defined(PETSC_MISSING_LAPACK_GESVD) 5027 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5028 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5029 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5030 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5031 from that computed using LAPACKgesvd 5032 -> This is due to a different computation of eigenvectors in LAPACKheev 5033 -> The quality of the POD-computed basis will be the same */ 5034 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5035 /* Store upper triangular part of correlation matrix */ 5036 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5037 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5038 for (j=0;j<temp_constraints;j++) { 5039 for (k=0;k<j+1;k++) { 5040 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5041 } 5042 } 5043 /* compute eigenvalues and eigenvectors of correlation matrix */ 5044 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5045 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5046 #if !defined(PETSC_USE_COMPLEX) 5047 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5048 #else 5049 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5050 #endif 5051 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5052 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5053 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5054 j = 0; 5055 while (j < temp_constraints && singular_vals[j] < tol) j++; 5056 total_counts = total_counts-j; 5057 valid_constraints = temp_constraints-j; 5058 /* scale and copy POD basis into used quadrature memory */ 5059 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5060 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5061 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5062 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5063 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5064 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5065 if (j<temp_constraints) { 5066 PetscInt ii; 5067 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5068 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5069 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5070 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5071 for (k=0;k<temp_constraints-j;k++) { 5072 for (ii=0;ii<size_of_constraint;ii++) { 5073 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5074 } 5075 } 5076 } 5077 #else /* on missing GESVD */ 5078 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5079 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5080 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5081 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5082 #if !defined(PETSC_USE_COMPLEX) 5083 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5084 #else 5085 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5086 #endif 5087 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5088 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5089 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5090 k = temp_constraints; 5091 if (k > size_of_constraint) k = size_of_constraint; 5092 j = 0; 5093 while (j < k && singular_vals[k-j-1] < tol) j++; 5094 valid_constraints = k-j; 5095 total_counts = total_counts-temp_constraints+valid_constraints; 5096 #endif /* on missing GESVD */ 5097 } 5098 } 5099 /* update pointers information */ 5100 if (valid_constraints) { 5101 constraints_n[total_counts_cc] = valid_constraints; 5102 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5103 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5104 /* set change_of_basis flag */ 5105 if (boolforchange) { 5106 PetscBTSet(change_basis,total_counts_cc); 5107 } 5108 total_counts_cc++; 5109 } 5110 } 5111 /* free workspace */ 5112 if (!skip_lapack) { 5113 ierr = PetscFree(work);CHKERRQ(ierr); 5114 #if defined(PETSC_USE_COMPLEX) 5115 ierr = PetscFree(rwork);CHKERRQ(ierr); 5116 #endif 5117 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5118 #if defined(PETSC_MISSING_LAPACK_GESVD) 5119 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5120 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5121 #endif 5122 } 5123 for (k=0;k<nnsp_size;k++) { 5124 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5125 } 5126 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5127 /* free index sets of faces, edges and vertices */ 5128 for (i=0;i<n_ISForFaces;i++) { 5129 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5130 } 5131 if (n_ISForFaces) { 5132 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5133 } 5134 for (i=0;i<n_ISForEdges;i++) { 5135 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5136 } 5137 if (n_ISForEdges) { 5138 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5139 } 5140 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5141 } else { 5142 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5143 5144 total_counts = 0; 5145 n_vertices = 0; 5146 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5147 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5148 } 5149 max_constraints = 0; 5150 total_counts_cc = 0; 5151 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5152 total_counts += pcbddc->adaptive_constraints_n[i]; 5153 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5154 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5155 } 5156 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5157 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5158 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5159 constraints_data = pcbddc->adaptive_constraints_data; 5160 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5161 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5162 total_counts_cc = 0; 5163 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5164 if (pcbddc->adaptive_constraints_n[i]) { 5165 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5166 } 5167 } 5168 #if 0 5169 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5170 for (i=0;i<total_counts_cc;i++) { 5171 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5172 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5173 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5174 printf(" %d",constraints_idxs[j]); 5175 } 5176 printf("\n"); 5177 printf("number of cc: %d\n",constraints_n[i]); 5178 } 5179 for (i=0;i<n_vertices;i++) { 5180 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5181 } 5182 for (i=0;i<sub_schurs->n_subs;i++) { 5183 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5184 } 5185 #endif 5186 5187 max_size_of_constraint = 0; 5188 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5189 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5190 /* Change of basis */ 5191 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5192 if (pcbddc->use_change_of_basis) { 5193 for (i=0;i<sub_schurs->n_subs;i++) { 5194 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5195 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5196 } 5197 } 5198 } 5199 } 5200 pcbddc->local_primal_size = total_counts; 5201 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5202 5203 /* map constraints_idxs in boundary numbering */ 5204 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5205 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5206 5207 /* Create constraint matrix */ 5208 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5209 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5210 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5211 5212 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5213 /* determine if a QR strategy is needed for change of basis */ 5214 qr_needed = PETSC_FALSE; 5215 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5216 total_primal_vertices=0; 5217 pcbddc->local_primal_size_cc = 0; 5218 for (i=0;i<total_counts_cc;i++) { 5219 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5220 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5221 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5222 pcbddc->local_primal_size_cc += 1; 5223 } else if (PetscBTLookup(change_basis,i)) { 5224 for (k=0;k<constraints_n[i];k++) { 5225 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5226 } 5227 pcbddc->local_primal_size_cc += constraints_n[i]; 5228 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5229 PetscBTSet(qr_needed_idx,i); 5230 qr_needed = PETSC_TRUE; 5231 } 5232 } else { 5233 pcbddc->local_primal_size_cc += 1; 5234 } 5235 } 5236 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5237 pcbddc->n_vertices = total_primal_vertices; 5238 /* permute indices in order to have a sorted set of vertices */ 5239 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5240 5241 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5242 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5243 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5244 5245 /* nonzero structure of constraint matrix */ 5246 /* and get reference dof for local constraints */ 5247 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5248 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5249 5250 j = total_primal_vertices; 5251 total_counts = total_primal_vertices; 5252 cum = total_primal_vertices; 5253 for (i=n_vertices;i<total_counts_cc;i++) { 5254 if (!PetscBTLookup(change_basis,i)) { 5255 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5256 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5257 cum++; 5258 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5259 for (k=0;k<constraints_n[i];k++) { 5260 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5261 nnz[j+k] = size_of_constraint; 5262 } 5263 j += constraints_n[i]; 5264 } 5265 } 5266 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5267 ierr = PetscFree(nnz);CHKERRQ(ierr); 5268 5269 /* set values in constraint matrix */ 5270 for (i=0;i<total_primal_vertices;i++) { 5271 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5272 } 5273 total_counts = total_primal_vertices; 5274 for (i=n_vertices;i<total_counts_cc;i++) { 5275 if (!PetscBTLookup(change_basis,i)) { 5276 PetscInt *cols; 5277 5278 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5279 cols = constraints_idxs+constraints_idxs_ptr[i]; 5280 for (k=0;k<constraints_n[i];k++) { 5281 PetscInt row = total_counts+k; 5282 PetscScalar *vals; 5283 5284 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5285 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5286 } 5287 total_counts += constraints_n[i]; 5288 } 5289 } 5290 /* assembling */ 5291 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5292 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5293 5294 /* 5295 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5296 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5297 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5298 */ 5299 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5300 if (pcbddc->use_change_of_basis) { 5301 /* dual and primal dofs on a single cc */ 5302 PetscInt dual_dofs,primal_dofs; 5303 /* working stuff for GEQRF */ 5304 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5305 PetscBLASInt lqr_work; 5306 /* working stuff for UNGQR */ 5307 PetscScalar *gqr_work,lgqr_work_t; 5308 PetscBLASInt lgqr_work; 5309 /* working stuff for TRTRS */ 5310 PetscScalar *trs_rhs; 5311 PetscBLASInt Blas_NRHS; 5312 /* pointers for values insertion into change of basis matrix */ 5313 PetscInt *start_rows,*start_cols; 5314 PetscScalar *start_vals; 5315 /* working stuff for values insertion */ 5316 PetscBT is_primal; 5317 PetscInt *aux_primal_numbering_B; 5318 /* matrix sizes */ 5319 PetscInt global_size,local_size; 5320 /* temporary change of basis */ 5321 Mat localChangeOfBasisMatrix; 5322 /* extra space for debugging */ 5323 PetscScalar *dbg_work; 5324 5325 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5326 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5327 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5328 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5329 /* nonzeros for local mat */ 5330 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5331 if (!pcbddc->benign_change || pcbddc->fake_change) { 5332 for (i=0;i<pcis->n;i++) nnz[i]=1; 5333 } else { 5334 const PetscInt *ii; 5335 PetscInt n; 5336 PetscBool flg_row; 5337 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5338 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5339 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5340 } 5341 for (i=n_vertices;i<total_counts_cc;i++) { 5342 if (PetscBTLookup(change_basis,i)) { 5343 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5344 if (PetscBTLookup(qr_needed_idx,i)) { 5345 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5346 } else { 5347 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5348 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5349 } 5350 } 5351 } 5352 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5353 ierr = PetscFree(nnz);CHKERRQ(ierr); 5354 /* Set interior change in the matrix */ 5355 if (!pcbddc->benign_change || pcbddc->fake_change) { 5356 for (i=0;i<pcis->n;i++) { 5357 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5358 } 5359 } else { 5360 const PetscInt *ii,*jj; 5361 PetscScalar *aa; 5362 PetscInt n; 5363 PetscBool flg_row; 5364 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5365 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5366 for (i=0;i<n;i++) { 5367 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5368 } 5369 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5370 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5371 } 5372 5373 if (pcbddc->dbg_flag) { 5374 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5375 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5376 } 5377 5378 5379 /* Now we loop on the constraints which need a change of basis */ 5380 /* 5381 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5382 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5383 5384 Basic blocks of change of basis matrix T computed by 5385 5386 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5387 5388 | 1 0 ... 0 s_1/S | 5389 | 0 1 ... 0 s_2/S | 5390 | ... | 5391 | 0 ... 1 s_{n-1}/S | 5392 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5393 5394 with S = \sum_{i=1}^n s_i^2 5395 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5396 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5397 5398 - QR decomposition of constraints otherwise 5399 */ 5400 if (qr_needed) { 5401 /* space to store Q */ 5402 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5403 /* array to store scaling factors for reflectors */ 5404 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5405 /* first we issue queries for optimal work */ 5406 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5407 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5408 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5409 lqr_work = -1; 5410 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5411 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5412 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5413 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5414 lgqr_work = -1; 5415 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5416 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5417 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5418 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5419 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5420 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5421 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5422 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5423 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5424 /* array to store rhs and solution of triangular solver */ 5425 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5426 /* allocating workspace for check */ 5427 if (pcbddc->dbg_flag) { 5428 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5429 } 5430 } 5431 /* array to store whether a node is primal or not */ 5432 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5433 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5434 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5435 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5436 for (i=0;i<total_primal_vertices;i++) { 5437 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5438 } 5439 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5440 5441 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5442 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5443 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5444 if (PetscBTLookup(change_basis,total_counts)) { 5445 /* get constraint info */ 5446 primal_dofs = constraints_n[total_counts]; 5447 dual_dofs = size_of_constraint-primal_dofs; 5448 5449 if (pcbddc->dbg_flag) { 5450 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5451 } 5452 5453 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5454 5455 /* copy quadrature constraints for change of basis check */ 5456 if (pcbddc->dbg_flag) { 5457 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5458 } 5459 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5460 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5461 5462 /* compute QR decomposition of constraints */ 5463 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5464 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5465 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5466 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5467 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5468 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5469 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5470 5471 /* explictly compute R^-T */ 5472 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5473 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5474 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5475 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5476 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5477 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5478 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5479 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5480 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5481 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5482 5483 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5484 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5485 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5486 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5487 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5488 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5489 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5490 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5491 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5492 5493 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5494 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5495 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5496 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5497 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5498 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5499 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5500 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5501 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5502 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5503 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5504 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5505 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5506 5507 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5508 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5509 /* insert cols for primal dofs */ 5510 for (j=0;j<primal_dofs;j++) { 5511 start_vals = &qr_basis[j*size_of_constraint]; 5512 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5513 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5514 } 5515 /* insert cols for dual dofs */ 5516 for (j=0,k=0;j<dual_dofs;k++) { 5517 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5518 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5519 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5520 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5521 j++; 5522 } 5523 } 5524 5525 /* check change of basis */ 5526 if (pcbddc->dbg_flag) { 5527 PetscInt ii,jj; 5528 PetscBool valid_qr=PETSC_TRUE; 5529 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5530 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5531 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5533 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5534 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5535 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5536 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5537 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5538 for (jj=0;jj<size_of_constraint;jj++) { 5539 for (ii=0;ii<primal_dofs;ii++) { 5540 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5541 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5542 } 5543 } 5544 if (!valid_qr) { 5545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5546 for (jj=0;jj<size_of_constraint;jj++) { 5547 for (ii=0;ii<primal_dofs;ii++) { 5548 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5549 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5550 } 5551 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5552 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5553 } 5554 } 5555 } 5556 } else { 5557 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5558 } 5559 } 5560 } else { /* simple transformation block */ 5561 PetscInt row,col; 5562 PetscScalar val,norm; 5563 5564 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5565 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5566 for (j=0;j<size_of_constraint;j++) { 5567 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5568 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5569 if (!PetscBTLookup(is_primal,row_B)) { 5570 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5571 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5572 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5573 } else { 5574 for (k=0;k<size_of_constraint;k++) { 5575 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5576 if (row != col) { 5577 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5578 } else { 5579 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5580 } 5581 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5582 } 5583 } 5584 } 5585 if (pcbddc->dbg_flag) { 5586 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5587 } 5588 } 5589 } else { 5590 if (pcbddc->dbg_flag) { 5591 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5592 } 5593 } 5594 } 5595 5596 /* free workspace */ 5597 if (qr_needed) { 5598 if (pcbddc->dbg_flag) { 5599 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5600 } 5601 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5602 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5603 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5604 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5605 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5606 } 5607 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5608 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5609 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5610 5611 /* assembling of global change of variable */ 5612 if (!pcbddc->fake_change) { 5613 Mat tmat; 5614 PetscInt bs; 5615 5616 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5617 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5618 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5619 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5620 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5621 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5622 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5623 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5624 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5625 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5626 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5627 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5628 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5629 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5630 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5631 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5632 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5633 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5634 5635 /* check */ 5636 if (pcbddc->dbg_flag) { 5637 PetscReal error; 5638 Vec x,x_change; 5639 5640 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5641 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5642 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5643 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5644 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5645 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5646 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5647 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5648 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5649 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5650 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5651 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5652 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5653 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5654 ierr = VecDestroy(&x);CHKERRQ(ierr); 5655 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5656 } 5657 /* adapt sub_schurs computed (if any) */ 5658 if (pcbddc->use_deluxe_scaling) { 5659 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5660 5661 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5662 if (sub_schurs && sub_schurs->S_Ej_all) { 5663 Mat S_new,tmat; 5664 IS is_all_N,is_V_Sall = NULL; 5665 5666 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5667 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5668 if (pcbddc->deluxe_zerorows) { 5669 ISLocalToGlobalMapping NtoSall; 5670 IS is_V; 5671 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5672 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5673 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5674 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5675 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5676 } 5677 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5678 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5679 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5680 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5681 if (pcbddc->deluxe_zerorows) { 5682 const PetscScalar *array; 5683 const PetscInt *idxs_V,*idxs_all; 5684 PetscInt i,n_V; 5685 5686 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5687 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5688 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5689 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5690 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5691 for (i=0;i<n_V;i++) { 5692 PetscScalar val; 5693 PetscInt idx; 5694 5695 idx = idxs_V[i]; 5696 val = array[idxs_all[idxs_V[i]]]; 5697 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5698 } 5699 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5700 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5701 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5702 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5703 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5704 } 5705 sub_schurs->S_Ej_all = S_new; 5706 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5707 if (sub_schurs->sum_S_Ej_all) { 5708 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5709 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5710 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5711 if (pcbddc->deluxe_zerorows) { 5712 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5713 } 5714 sub_schurs->sum_S_Ej_all = S_new; 5715 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5716 } 5717 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5718 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5719 } 5720 /* destroy any change of basis context in sub_schurs */ 5721 if (sub_schurs && sub_schurs->change) { 5722 PetscInt i; 5723 5724 for (i=0;i<sub_schurs->n_subs;i++) { 5725 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5726 } 5727 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5728 } 5729 } 5730 if (pcbddc->switch_static) { /* need to save the local change */ 5731 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5732 } else { 5733 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5734 } 5735 /* determine if any process has changed the pressures locally */ 5736 pcbddc->change_interior = pcbddc->benign_have_null; 5737 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5738 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5739 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5740 pcbddc->use_qr_single = qr_needed; 5741 } 5742 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5743 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5744 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5745 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5746 } else { 5747 Mat benign_global = NULL; 5748 if (pcbddc->benign_have_null) { 5749 Mat tmat; 5750 5751 pcbddc->change_interior = PETSC_TRUE; 5752 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5753 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5754 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5755 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5756 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5757 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5758 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5759 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5760 if (pcbddc->benign_change) { 5761 Mat M; 5762 5763 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5764 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5765 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5766 ierr = MatDestroy(&M);CHKERRQ(ierr); 5767 } else { 5768 Mat eye; 5769 PetscScalar *array; 5770 5771 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5772 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5773 for (i=0;i<pcis->n;i++) { 5774 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5775 } 5776 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5777 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5778 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5779 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5780 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5781 } 5782 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5783 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5784 } 5785 if (pcbddc->user_ChangeOfBasisMatrix) { 5786 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5787 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5788 } else if (pcbddc->benign_have_null) { 5789 pcbddc->ChangeOfBasisMatrix = benign_global; 5790 } 5791 } 5792 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5793 IS is_global; 5794 const PetscInt *gidxs; 5795 5796 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5797 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5798 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5799 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5800 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5801 } 5802 } 5803 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5804 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5805 } 5806 5807 if (!pcbddc->fake_change) { 5808 /* add pressure dofs to set of primal nodes for numbering purposes */ 5809 for (i=0;i<pcbddc->benign_n;i++) { 5810 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5811 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5812 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5813 pcbddc->local_primal_size_cc++; 5814 pcbddc->local_primal_size++; 5815 } 5816 5817 /* check if a new primal space has been introduced (also take into account benign trick) */ 5818 pcbddc->new_primal_space_local = PETSC_TRUE; 5819 if (olocal_primal_size == pcbddc->local_primal_size) { 5820 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5821 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5822 if (!pcbddc->new_primal_space_local) { 5823 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5824 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5825 } 5826 } 5827 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5828 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5829 } 5830 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5831 5832 /* flush dbg viewer */ 5833 if (pcbddc->dbg_flag) { 5834 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5835 } 5836 5837 /* free workspace */ 5838 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5839 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5840 if (!pcbddc->adaptive_selection) { 5841 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5842 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5843 } else { 5844 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5845 pcbddc->adaptive_constraints_idxs_ptr, 5846 pcbddc->adaptive_constraints_data_ptr, 5847 pcbddc->adaptive_constraints_idxs, 5848 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5849 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5850 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5851 } 5852 PetscFunctionReturn(0); 5853 } 5854 5855 #undef __FUNCT__ 5856 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5857 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5858 { 5859 ISLocalToGlobalMapping map; 5860 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5861 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5862 PetscInt ierr,i,N; 5863 5864 PetscFunctionBegin; 5865 if (pcbddc->graphanalyzed && !pcbddc->recompute_topography) PetscFunctionReturn(0); 5866 /* Reset previously computed graph */ 5867 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5868 /* Init local Graph struct */ 5869 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5870 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5871 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5872 5873 /* Check validity of the csr graph passed in by the user */ 5874 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5875 5876 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5877 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5878 PetscInt *xadj,*adjncy; 5879 PetscInt nvtxs; 5880 PetscBool flg_row=PETSC_FALSE; 5881 5882 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5883 if (flg_row) { 5884 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5885 pcbddc->computed_rowadj = PETSC_TRUE; 5886 } 5887 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5888 } 5889 if (pcbddc->dbg_flag) { 5890 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5891 } 5892 5893 /* Setup of Graph */ 5894 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5895 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5896 5897 /* attach info on disconnected subdomains if present */ 5898 if (pcbddc->n_local_subs) { 5899 PetscInt *local_subs; 5900 5901 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5902 for (i=0;i<pcbddc->n_local_subs;i++) { 5903 const PetscInt *idxs; 5904 PetscInt nl,j; 5905 5906 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5907 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5908 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 5909 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5910 } 5911 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 5912 pcbddc->mat_graph->local_subs = local_subs; 5913 } 5914 5915 /* Graph's connected components analysis */ 5916 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 5917 5918 /* set flag indicating analysis has been done */ 5919 pcbddc->graphanalyzed = PETSC_TRUE; 5920 PetscFunctionReturn(0); 5921 } 5922 5923 #undef __FUNCT__ 5924 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 5925 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 5926 { 5927 PetscInt i,j; 5928 PetscScalar *alphas; 5929 PetscErrorCode ierr; 5930 5931 PetscFunctionBegin; 5932 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 5933 for (i=0;i<n;i++) { 5934 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 5935 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 5936 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 5937 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5938 } 5939 ierr = PetscFree(alphas);CHKERRQ(ierr); 5940 PetscFunctionReturn(0); 5941 } 5942 5943 #undef __FUNCT__ 5944 #define __FUNCT__ "MatISGetSubassemblingPattern" 5945 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5946 { 5947 Mat A; 5948 PetscInt n_neighs,*neighs,*n_shared,**shared; 5949 PetscMPIInt size,rank,color; 5950 PetscInt *xadj,*adjncy; 5951 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5952 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5953 PetscInt void_procs,*procs_candidates = NULL; 5954 PetscInt xadj_count, *count; 5955 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5956 PetscSubcomm psubcomm; 5957 MPI_Comm subcomm; 5958 PetscErrorCode ierr; 5959 5960 PetscFunctionBegin; 5961 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5962 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5963 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5964 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5965 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5966 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5967 5968 if (have_void) *have_void = PETSC_FALSE; 5969 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5970 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5971 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5972 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5973 im_active = !!(n); 5974 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5975 void_procs = size - active_procs; 5976 /* get ranks of of non-active processes in mat communicator */ 5977 if (void_procs) { 5978 PetscInt ncand; 5979 5980 if (have_void) *have_void = PETSC_TRUE; 5981 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5982 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5983 for (i=0,ncand=0;i<size;i++) { 5984 if (!procs_candidates[i]) { 5985 procs_candidates[ncand++] = i; 5986 } 5987 } 5988 /* force n_subdomains to be not greater that the number of non-active processes */ 5989 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5990 } 5991 5992 /* number of subdomains requested greater than active processes -> just shift the matrix 5993 number of subdomains requested 1 -> send to master or first candidate in voids */ 5994 if (active_procs < *n_subdomains || *n_subdomains == 1) { 5995 PetscInt issize,isidx,dest; 5996 if (*n_subdomains == 1) dest = 0; 5997 else dest = rank; 5998 if (im_active) { 5999 issize = 1; 6000 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6001 isidx = procs_candidates[dest]; 6002 } else { 6003 isidx = dest; 6004 } 6005 } else { 6006 issize = 0; 6007 isidx = -1; 6008 } 6009 *n_subdomains = active_procs; 6010 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6011 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6012 PetscFunctionReturn(0); 6013 } 6014 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6015 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6016 threshold = PetscMax(threshold,2); 6017 6018 /* Get info on mapping */ 6019 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 6020 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6021 6022 /* build local CSR graph of subdomains' connectivity */ 6023 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6024 xadj[0] = 0; 6025 xadj[1] = PetscMax(n_neighs-1,0); 6026 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6027 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6028 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 6029 for (i=1;i<n_neighs;i++) 6030 for (j=0;j<n_shared[i];j++) 6031 count[shared[i][j]] += 1; 6032 6033 xadj_count = 0; 6034 for (i=1;i<n_neighs;i++) { 6035 for (j=0;j<n_shared[i];j++) { 6036 if (count[shared[i][j]] < threshold) { 6037 adjncy[xadj_count] = neighs[i]; 6038 adjncy_wgt[xadj_count] = n_shared[i]; 6039 xadj_count++; 6040 break; 6041 } 6042 } 6043 } 6044 xadj[1] = xadj_count; 6045 ierr = PetscFree(count);CHKERRQ(ierr); 6046 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6047 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6048 6049 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6050 6051 /* Restrict work on active processes only */ 6052 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6053 if (void_procs) { 6054 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6055 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6056 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6057 subcomm = PetscSubcommChild(psubcomm); 6058 } else { 6059 psubcomm = NULL; 6060 subcomm = PetscObjectComm((PetscObject)mat); 6061 } 6062 6063 v_wgt = NULL; 6064 if (!color) { 6065 ierr = PetscFree(xadj);CHKERRQ(ierr); 6066 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6067 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6068 } else { 6069 Mat subdomain_adj; 6070 IS new_ranks,new_ranks_contig; 6071 MatPartitioning partitioner; 6072 PetscInt rstart=0,rend=0; 6073 PetscInt *is_indices,*oldranks; 6074 PetscMPIInt size; 6075 PetscBool aggregate; 6076 6077 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6078 if (void_procs) { 6079 PetscInt prank = rank; 6080 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6081 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6082 for (i=0;i<xadj[1];i++) { 6083 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6084 } 6085 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6086 } else { 6087 oldranks = NULL; 6088 } 6089 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6090 if (aggregate) { /* TODO: all this part could be made more efficient */ 6091 PetscInt lrows,row,ncols,*cols; 6092 PetscMPIInt nrank; 6093 PetscScalar *vals; 6094 6095 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6096 lrows = 0; 6097 if (nrank<redprocs) { 6098 lrows = size/redprocs; 6099 if (nrank<size%redprocs) lrows++; 6100 } 6101 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6102 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6103 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6104 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6105 row = nrank; 6106 ncols = xadj[1]-xadj[0]; 6107 cols = adjncy; 6108 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6109 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6110 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6111 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6112 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6113 ierr = PetscFree(xadj);CHKERRQ(ierr); 6114 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6115 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6116 ierr = PetscFree(vals);CHKERRQ(ierr); 6117 if (use_vwgt) { 6118 Vec v; 6119 const PetscScalar *array; 6120 PetscInt nl; 6121 6122 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6123 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6124 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6125 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6126 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6127 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6128 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6129 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6130 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6131 ierr = VecDestroy(&v);CHKERRQ(ierr); 6132 } 6133 } else { 6134 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6135 if (use_vwgt) { 6136 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6137 v_wgt[0] = local_size; 6138 } 6139 } 6140 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6141 6142 /* Partition */ 6143 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6144 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6145 if (v_wgt) { 6146 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6147 } 6148 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6149 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6150 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6151 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6152 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6153 6154 /* renumber new_ranks to avoid "holes" in new set of processors */ 6155 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6156 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6157 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6158 if (!aggregate) { 6159 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6160 #if defined(PETSC_USE_DEBUG) 6161 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6162 #endif 6163 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6164 } else if (oldranks) { 6165 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6166 } else { 6167 ranks_send_to_idx[0] = is_indices[0]; 6168 } 6169 } else { 6170 PetscInt idxs[1]; 6171 PetscMPIInt tag; 6172 MPI_Request *reqs; 6173 6174 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6175 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6176 for (i=rstart;i<rend;i++) { 6177 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6178 } 6179 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6180 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6181 ierr = PetscFree(reqs);CHKERRQ(ierr); 6182 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6183 #if defined(PETSC_USE_DEBUG) 6184 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6185 #endif 6186 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6187 } else if (oldranks) { 6188 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6189 } else { 6190 ranks_send_to_idx[0] = idxs[0]; 6191 } 6192 } 6193 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6194 /* clean up */ 6195 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6196 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6197 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6198 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6199 } 6200 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6201 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6202 6203 /* assemble parallel IS for sends */ 6204 i = 1; 6205 if (!color) i=0; 6206 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6207 PetscFunctionReturn(0); 6208 } 6209 6210 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6211 6212 #undef __FUNCT__ 6213 #define __FUNCT__ "MatISSubassemble" 6214 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6215 { 6216 Mat local_mat; 6217 IS is_sends_internal; 6218 PetscInt rows,cols,new_local_rows; 6219 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6220 PetscBool ismatis,isdense,newisdense,destroy_mat; 6221 ISLocalToGlobalMapping l2gmap; 6222 PetscInt* l2gmap_indices; 6223 const PetscInt* is_indices; 6224 MatType new_local_type; 6225 /* buffers */ 6226 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6227 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6228 PetscInt *recv_buffer_idxs_local; 6229 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6230 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6231 /* MPI */ 6232 MPI_Comm comm,comm_n; 6233 PetscSubcomm subcomm; 6234 PetscMPIInt n_sends,n_recvs,commsize; 6235 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6236 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6237 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6238 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6239 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6240 PetscErrorCode ierr; 6241 6242 PetscFunctionBegin; 6243 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6244 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6245 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6246 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6247 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6248 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6249 PetscValidLogicalCollectiveBool(mat,reuse,6); 6250 PetscValidLogicalCollectiveInt(mat,nis,8); 6251 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6252 if (nvecs) { 6253 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6254 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6255 } 6256 /* further checks */ 6257 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6258 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6259 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6260 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6261 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6262 if (reuse && *mat_n) { 6263 PetscInt mrows,mcols,mnrows,mncols; 6264 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6265 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6266 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6267 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6268 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6269 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6270 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6271 } 6272 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6273 PetscValidLogicalCollectiveInt(mat,bs,0); 6274 6275 /* prepare IS for sending if not provided */ 6276 if (!is_sends) { 6277 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6278 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6279 } else { 6280 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6281 is_sends_internal = is_sends; 6282 } 6283 6284 /* get comm */ 6285 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6286 6287 /* compute number of sends */ 6288 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6289 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6290 6291 /* compute number of receives */ 6292 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6293 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6294 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6295 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6296 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6297 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6298 ierr = PetscFree(iflags);CHKERRQ(ierr); 6299 6300 /* restrict comm if requested */ 6301 subcomm = 0; 6302 destroy_mat = PETSC_FALSE; 6303 if (restrict_comm) { 6304 PetscMPIInt color,subcommsize; 6305 6306 color = 0; 6307 if (restrict_full) { 6308 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6309 } else { 6310 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6311 } 6312 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6313 subcommsize = commsize - subcommsize; 6314 /* check if reuse has been requested */ 6315 if (reuse) { 6316 if (*mat_n) { 6317 PetscMPIInt subcommsize2; 6318 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6319 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6320 comm_n = PetscObjectComm((PetscObject)*mat_n); 6321 } else { 6322 comm_n = PETSC_COMM_SELF; 6323 } 6324 } else { /* MAT_INITIAL_MATRIX */ 6325 PetscMPIInt rank; 6326 6327 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6328 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6329 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6330 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6331 comm_n = PetscSubcommChild(subcomm); 6332 } 6333 /* flag to destroy *mat_n if not significative */ 6334 if (color) destroy_mat = PETSC_TRUE; 6335 } else { 6336 comm_n = comm; 6337 } 6338 6339 /* prepare send/receive buffers */ 6340 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6341 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6342 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6343 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6344 if (nis) { 6345 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6346 } 6347 6348 /* Get data from local matrices */ 6349 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6350 /* TODO: See below some guidelines on how to prepare the local buffers */ 6351 /* 6352 send_buffer_vals should contain the raw values of the local matrix 6353 send_buffer_idxs should contain: 6354 - MatType_PRIVATE type 6355 - PetscInt size_of_l2gmap 6356 - PetscInt global_row_indices[size_of_l2gmap] 6357 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6358 */ 6359 else { 6360 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6361 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6362 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6363 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6364 send_buffer_idxs[1] = i; 6365 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6366 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6367 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6368 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6369 for (i=0;i<n_sends;i++) { 6370 ilengths_vals[is_indices[i]] = len*len; 6371 ilengths_idxs[is_indices[i]] = len+2; 6372 } 6373 } 6374 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6375 /* additional is (if any) */ 6376 if (nis) { 6377 PetscMPIInt psum; 6378 PetscInt j; 6379 for (j=0,psum=0;j<nis;j++) { 6380 PetscInt plen; 6381 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6382 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6383 psum += len+1; /* indices + lenght */ 6384 } 6385 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6386 for (j=0,psum=0;j<nis;j++) { 6387 PetscInt plen; 6388 const PetscInt *is_array_idxs; 6389 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6390 send_buffer_idxs_is[psum] = plen; 6391 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6392 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6393 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6394 psum += plen+1; /* indices + lenght */ 6395 } 6396 for (i=0;i<n_sends;i++) { 6397 ilengths_idxs_is[is_indices[i]] = psum; 6398 } 6399 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6400 } 6401 6402 buf_size_idxs = 0; 6403 buf_size_vals = 0; 6404 buf_size_idxs_is = 0; 6405 buf_size_vecs = 0; 6406 for (i=0;i<n_recvs;i++) { 6407 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6408 buf_size_vals += (PetscInt)olengths_vals[i]; 6409 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6410 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6411 } 6412 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6413 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6414 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6415 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6416 6417 /* get new tags for clean communications */ 6418 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6419 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6420 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6421 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6422 6423 /* allocate for requests */ 6424 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6425 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6426 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6427 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6428 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6429 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6430 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6431 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6432 6433 /* communications */ 6434 ptr_idxs = recv_buffer_idxs; 6435 ptr_vals = recv_buffer_vals; 6436 ptr_idxs_is = recv_buffer_idxs_is; 6437 ptr_vecs = recv_buffer_vecs; 6438 for (i=0;i<n_recvs;i++) { 6439 source_dest = onodes[i]; 6440 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6441 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6442 ptr_idxs += olengths_idxs[i]; 6443 ptr_vals += olengths_vals[i]; 6444 if (nis) { 6445 source_dest = onodes_is[i]; 6446 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6447 ptr_idxs_is += olengths_idxs_is[i]; 6448 } 6449 if (nvecs) { 6450 source_dest = onodes[i]; 6451 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6452 ptr_vecs += olengths_idxs[i]-2; 6453 } 6454 } 6455 for (i=0;i<n_sends;i++) { 6456 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6457 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6458 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6459 if (nis) { 6460 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6461 } 6462 if (nvecs) { 6463 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6464 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6465 } 6466 } 6467 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6468 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6469 6470 /* assemble new l2g map */ 6471 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6472 ptr_idxs = recv_buffer_idxs; 6473 new_local_rows = 0; 6474 for (i=0;i<n_recvs;i++) { 6475 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6476 ptr_idxs += olengths_idxs[i]; 6477 } 6478 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6479 ptr_idxs = recv_buffer_idxs; 6480 new_local_rows = 0; 6481 for (i=0;i<n_recvs;i++) { 6482 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6483 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6484 ptr_idxs += olengths_idxs[i]; 6485 } 6486 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6487 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6488 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6489 6490 /* infer new local matrix type from received local matrices type */ 6491 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6492 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6493 if (n_recvs) { 6494 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6495 ptr_idxs = recv_buffer_idxs; 6496 for (i=0;i<n_recvs;i++) { 6497 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6498 new_local_type_private = MATAIJ_PRIVATE; 6499 break; 6500 } 6501 ptr_idxs += olengths_idxs[i]; 6502 } 6503 switch (new_local_type_private) { 6504 case MATDENSE_PRIVATE: 6505 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6506 new_local_type = MATSEQAIJ; 6507 bs = 1; 6508 } else { /* if I receive only 1 dense matrix */ 6509 new_local_type = MATSEQDENSE; 6510 bs = 1; 6511 } 6512 break; 6513 case MATAIJ_PRIVATE: 6514 new_local_type = MATSEQAIJ; 6515 bs = 1; 6516 break; 6517 case MATBAIJ_PRIVATE: 6518 new_local_type = MATSEQBAIJ; 6519 break; 6520 case MATSBAIJ_PRIVATE: 6521 new_local_type = MATSEQSBAIJ; 6522 break; 6523 default: 6524 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6525 break; 6526 } 6527 } else { /* by default, new_local_type is seqdense */ 6528 new_local_type = MATSEQDENSE; 6529 bs = 1; 6530 } 6531 6532 /* create MATIS object if needed */ 6533 if (!reuse) { 6534 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6535 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6536 } else { 6537 /* it also destroys the local matrices */ 6538 if (*mat_n) { 6539 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6540 } else { /* this is a fake object */ 6541 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6542 } 6543 } 6544 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6545 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6546 6547 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6548 6549 /* Global to local map of received indices */ 6550 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6551 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6552 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6553 6554 /* restore attributes -> type of incoming data and its size */ 6555 buf_size_idxs = 0; 6556 for (i=0;i<n_recvs;i++) { 6557 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6558 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6559 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6560 } 6561 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6562 6563 /* set preallocation */ 6564 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6565 if (!newisdense) { 6566 PetscInt *new_local_nnz=0; 6567 6568 ptr_idxs = recv_buffer_idxs_local; 6569 if (n_recvs) { 6570 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6571 } 6572 for (i=0;i<n_recvs;i++) { 6573 PetscInt j; 6574 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6575 for (j=0;j<*(ptr_idxs+1);j++) { 6576 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6577 } 6578 } else { 6579 /* TODO */ 6580 } 6581 ptr_idxs += olengths_idxs[i]; 6582 } 6583 if (new_local_nnz) { 6584 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6585 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6586 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6587 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6588 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6589 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6590 } else { 6591 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6592 } 6593 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6594 } else { 6595 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6596 } 6597 6598 /* set values */ 6599 ptr_vals = recv_buffer_vals; 6600 ptr_idxs = recv_buffer_idxs_local; 6601 for (i=0;i<n_recvs;i++) { 6602 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6603 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6604 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6605 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6606 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6607 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6608 } else { 6609 /* TODO */ 6610 } 6611 ptr_idxs += olengths_idxs[i]; 6612 ptr_vals += olengths_vals[i]; 6613 } 6614 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6615 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6616 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6617 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6618 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6619 6620 #if 0 6621 if (!restrict_comm) { /* check */ 6622 Vec lvec,rvec; 6623 PetscReal infty_error; 6624 6625 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6626 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6627 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6628 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6629 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6630 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6631 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6632 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6633 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6634 } 6635 #endif 6636 6637 /* assemble new additional is (if any) */ 6638 if (nis) { 6639 PetscInt **temp_idxs,*count_is,j,psum; 6640 6641 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6642 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6643 ptr_idxs = recv_buffer_idxs_is; 6644 psum = 0; 6645 for (i=0;i<n_recvs;i++) { 6646 for (j=0;j<nis;j++) { 6647 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6648 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6649 psum += plen; 6650 ptr_idxs += plen+1; /* shift pointer to received data */ 6651 } 6652 } 6653 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6654 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6655 for (i=1;i<nis;i++) { 6656 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6657 } 6658 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6659 ptr_idxs = recv_buffer_idxs_is; 6660 for (i=0;i<n_recvs;i++) { 6661 for (j=0;j<nis;j++) { 6662 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6663 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6664 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6665 ptr_idxs += plen+1; /* shift pointer to received data */ 6666 } 6667 } 6668 for (i=0;i<nis;i++) { 6669 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6670 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6671 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6672 } 6673 ierr = PetscFree(count_is);CHKERRQ(ierr); 6674 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6675 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6676 } 6677 /* free workspace */ 6678 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6679 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6680 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6681 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6682 if (isdense) { 6683 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6684 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6685 } else { 6686 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6687 } 6688 if (nis) { 6689 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6690 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6691 } 6692 6693 if (nvecs) { 6694 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6695 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6696 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6697 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6698 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6699 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6700 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6701 /* set values */ 6702 ptr_vals = recv_buffer_vecs; 6703 ptr_idxs = recv_buffer_idxs_local; 6704 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6705 for (i=0;i<n_recvs;i++) { 6706 PetscInt j; 6707 for (j=0;j<*(ptr_idxs+1);j++) { 6708 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6709 } 6710 ptr_idxs += olengths_idxs[i]; 6711 ptr_vals += olengths_idxs[i]-2; 6712 } 6713 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6714 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6715 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6716 } 6717 6718 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6719 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6720 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6721 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6722 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6723 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6724 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6725 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6726 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6727 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6728 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6729 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6730 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6731 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6732 ierr = PetscFree(onodes);CHKERRQ(ierr); 6733 if (nis) { 6734 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6735 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6736 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6737 } 6738 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6739 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6740 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6741 for (i=0;i<nis;i++) { 6742 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6743 } 6744 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6745 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6746 } 6747 *mat_n = NULL; 6748 } 6749 PetscFunctionReturn(0); 6750 } 6751 6752 /* temporary hack into ksp private data structure */ 6753 #include <petsc/private/kspimpl.h> 6754 6755 #undef __FUNCT__ 6756 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6757 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6758 { 6759 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6760 PC_IS *pcis = (PC_IS*)pc->data; 6761 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6762 Mat coarsedivudotp = NULL; 6763 MatNullSpace CoarseNullSpace = NULL; 6764 ISLocalToGlobalMapping coarse_islg; 6765 IS coarse_is,*isarray; 6766 PetscInt i,im_active=-1,active_procs=-1; 6767 PetscInt nis,nisdofs,nisneu,nisvert; 6768 PC pc_temp; 6769 PCType coarse_pc_type; 6770 KSPType coarse_ksp_type; 6771 PetscBool multilevel_requested,multilevel_allowed; 6772 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6773 Mat t_coarse_mat_is; 6774 PetscInt ncoarse; 6775 PetscBool compute_vecs = PETSC_FALSE; 6776 PetscScalar *array; 6777 MatReuse coarse_mat_reuse; 6778 PetscBool restr, full_restr, have_void; 6779 PetscErrorCode ierr; 6780 6781 PetscFunctionBegin; 6782 /* Assign global numbering to coarse dofs */ 6783 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6784 PetscInt ocoarse_size; 6785 compute_vecs = PETSC_TRUE; 6786 ocoarse_size = pcbddc->coarse_size; 6787 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6788 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6789 /* see if we can avoid some work */ 6790 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6791 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6792 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6793 PC pc; 6794 PetscBool isbddc; 6795 6796 /* temporary workaround since PCBDDC does not have a reset method so far */ 6797 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6798 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6799 if (isbddc) { 6800 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6801 } else { 6802 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6803 } 6804 coarse_reuse = PETSC_FALSE; 6805 } else { /* we can safely reuse already computed coarse matrix */ 6806 coarse_reuse = PETSC_TRUE; 6807 } 6808 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6809 coarse_reuse = PETSC_FALSE; 6810 } 6811 /* reset any subassembling information */ 6812 if (!coarse_reuse || pcbddc->recompute_topography) { 6813 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6814 } 6815 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6816 coarse_reuse = PETSC_TRUE; 6817 } 6818 /* assemble coarse matrix */ 6819 if (coarse_reuse && pcbddc->coarse_ksp) { 6820 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6821 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6822 coarse_mat_reuse = MAT_REUSE_MATRIX; 6823 } else { 6824 coarse_mat = NULL; 6825 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6826 } 6827 6828 /* creates temporary l2gmap and IS for coarse indexes */ 6829 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6830 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6831 6832 /* creates temporary MATIS object for coarse matrix */ 6833 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6834 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6835 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6836 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6837 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6838 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6839 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6840 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6841 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6842 6843 /* count "active" (i.e. with positive local size) and "void" processes */ 6844 im_active = !!(pcis->n); 6845 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6846 6847 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6848 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6849 /* full_restr : just use the receivers from the subassembling pattern */ 6850 coarse_mat_is = NULL; 6851 multilevel_allowed = PETSC_FALSE; 6852 multilevel_requested = PETSC_FALSE; 6853 full_restr = PETSC_TRUE; 6854 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6855 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6856 if (multilevel_requested) { 6857 ncoarse = active_procs/pcbddc->coarsening_ratio; 6858 restr = PETSC_FALSE; 6859 full_restr = PETSC_FALSE; 6860 } else { 6861 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6862 restr = PETSC_TRUE; 6863 full_restr = PETSC_TRUE; 6864 } 6865 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6866 ncoarse = PetscMax(1,ncoarse); 6867 if (!pcbddc->coarse_subassembling) { 6868 if (pcbddc->coarsening_ratio > 1) { 6869 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6870 } else { 6871 PetscMPIInt size,rank; 6872 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6873 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6874 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6875 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6876 } 6877 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6878 PetscInt psum; 6879 PetscMPIInt size; 6880 if (pcbddc->coarse_ksp) psum = 1; 6881 else psum = 0; 6882 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6883 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6884 if (ncoarse < size) have_void = PETSC_TRUE; 6885 } 6886 /* determine if we can go multilevel */ 6887 if (multilevel_requested) { 6888 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6889 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6890 } 6891 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6892 6893 /* dump subassembling pattern */ 6894 if (pcbddc->dbg_flag && multilevel_allowed) { 6895 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6896 } 6897 6898 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6899 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6900 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6901 const PetscInt *idxs; 6902 ISLocalToGlobalMapping tmap; 6903 6904 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6905 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6906 /* allocate space for temporary storage */ 6907 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6908 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 6909 /* allocate for IS array */ 6910 nisdofs = pcbddc->n_ISForDofsLocal; 6911 nisneu = !!pcbddc->NeumannBoundariesLocal; 6912 nisvert = 0; /* nisvert is not used */ 6913 nis = nisdofs + nisneu + nisvert; 6914 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 6915 /* dofs splitting */ 6916 for (i=0;i<nisdofs;i++) { 6917 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 6918 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 6919 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6920 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6921 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6922 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6923 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6924 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 6925 } 6926 /* neumann boundaries */ 6927 if (pcbddc->NeumannBoundariesLocal) { 6928 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 6929 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 6930 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6931 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6932 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6933 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6934 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 6935 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 6936 } 6937 /* free memory */ 6938 ierr = PetscFree(tidxs);CHKERRQ(ierr); 6939 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 6940 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6941 } else { 6942 nis = 0; 6943 nisdofs = 0; 6944 nisneu = 0; 6945 nisvert = 0; 6946 isarray = NULL; 6947 } 6948 /* destroy no longer needed map */ 6949 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6950 6951 /* subassemble */ 6952 if (multilevel_allowed) { 6953 Vec vp[1]; 6954 PetscInt nvecs = 0; 6955 PetscBool reuse,reuser; 6956 6957 if (coarse_mat) reuse = PETSC_TRUE; 6958 else reuse = PETSC_FALSE; 6959 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6960 vp[0] = NULL; 6961 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6962 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6963 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6964 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6965 nvecs = 1; 6966 6967 if (pcbddc->divudotp) { 6968 Mat B,loc_divudotp; 6969 Vec v,p; 6970 IS dummy; 6971 PetscInt np; 6972 6973 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6974 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6975 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6976 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6977 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6978 ierr = VecSet(p,1.);CHKERRQ(ierr); 6979 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6980 ierr = VecDestroy(&p);CHKERRQ(ierr); 6981 ierr = MatDestroy(&B);CHKERRQ(ierr); 6982 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6983 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6984 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6985 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6986 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6987 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6988 ierr = VecDestroy(&v);CHKERRQ(ierr); 6989 } 6990 } 6991 if (reuser) { 6992 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6993 } else { 6994 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6995 } 6996 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6997 PetscScalar *arraym,*arrayv; 6998 PetscInt nl; 6999 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7000 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7001 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7002 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7003 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7004 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7005 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7006 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7007 } else { 7008 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7009 } 7010 } else { 7011 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 7012 } 7013 if (coarse_mat_is || coarse_mat) { 7014 PetscMPIInt size; 7015 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 7016 if (!multilevel_allowed) { 7017 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7018 } else { 7019 Mat A; 7020 7021 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7022 if (coarse_mat_is) { 7023 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7024 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7025 coarse_mat = coarse_mat_is; 7026 } 7027 /* be sure we don't have MatSeqDENSE as local mat */ 7028 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7029 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7030 } 7031 } 7032 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7033 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7034 7035 /* create local to global scatters for coarse problem */ 7036 if (compute_vecs) { 7037 PetscInt lrows; 7038 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7039 if (coarse_mat) { 7040 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7041 } else { 7042 lrows = 0; 7043 } 7044 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7045 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7046 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7047 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7048 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7049 } 7050 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7051 7052 /* set defaults for coarse KSP and PC */ 7053 if (multilevel_allowed) { 7054 coarse_ksp_type = KSPRICHARDSON; 7055 coarse_pc_type = PCBDDC; 7056 } else { 7057 coarse_ksp_type = KSPPREONLY; 7058 coarse_pc_type = PCREDUNDANT; 7059 } 7060 7061 /* print some info if requested */ 7062 if (pcbddc->dbg_flag) { 7063 if (!multilevel_allowed) { 7064 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7065 if (multilevel_requested) { 7066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7067 } else if (pcbddc->max_levels) { 7068 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7069 } 7070 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7071 } 7072 } 7073 7074 /* create the coarse KSP object only once with defaults */ 7075 if (coarse_mat) { 7076 PetscViewer dbg_viewer = NULL; 7077 if (pcbddc->dbg_flag) { 7078 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7079 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7080 } 7081 if (!pcbddc->coarse_ksp) { 7082 char prefix[256],str_level[16]; 7083 size_t len; 7084 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7085 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7086 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7087 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7088 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7089 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7090 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7091 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7092 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7093 /* prefix */ 7094 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7095 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7096 if (!pcbddc->current_level) { 7097 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7098 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7099 } else { 7100 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7101 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7102 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7103 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7104 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7105 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7106 } 7107 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7108 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7109 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7110 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7111 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7112 /* allow user customization */ 7113 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7114 } 7115 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7116 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7117 if (nisdofs) { 7118 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7119 for (i=0;i<nisdofs;i++) { 7120 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7121 } 7122 } 7123 if (nisneu) { 7124 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7125 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7126 } 7127 if (nisvert) { 7128 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7129 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7130 } 7131 7132 /* get some info after set from options */ 7133 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7134 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7135 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7136 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7137 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7138 isbddc = PETSC_FALSE; 7139 } 7140 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7141 if (isredundant) { 7142 KSP inner_ksp; 7143 PC inner_pc; 7144 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7145 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7146 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7147 } 7148 7149 /* parameters which miss an API */ 7150 if (isbddc) { 7151 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7152 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7153 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7154 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7155 if (pcbddc_coarse->benign_saddle_point) { 7156 Mat coarsedivudotp_is; 7157 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7158 IS row,col; 7159 const PetscInt *gidxs; 7160 PetscInt n,st,M,N; 7161 7162 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7163 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7164 st = st-n; 7165 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7166 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7167 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7168 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7169 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7170 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7171 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7172 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7173 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7174 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7175 ierr = ISDestroy(&row);CHKERRQ(ierr); 7176 ierr = ISDestroy(&col);CHKERRQ(ierr); 7177 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7178 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7179 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7180 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7181 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7182 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7183 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7184 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7185 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7186 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7187 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7188 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7189 } 7190 } 7191 7192 /* propagate symmetry info of coarse matrix */ 7193 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7194 if (pc->pmat->symmetric_set) { 7195 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7196 } 7197 if (pc->pmat->hermitian_set) { 7198 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7199 } 7200 if (pc->pmat->spd_set) { 7201 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7202 } 7203 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7204 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7205 } 7206 /* set operators */ 7207 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7208 if (pcbddc->dbg_flag) { 7209 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7210 } 7211 } 7212 ierr = PetscFree(isarray);CHKERRQ(ierr); 7213 #if 0 7214 { 7215 PetscViewer viewer; 7216 char filename[256]; 7217 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7218 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7219 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7220 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7221 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7222 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7223 } 7224 #endif 7225 7226 if (pcbddc->coarse_ksp) { 7227 Vec crhs,csol; 7228 7229 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7230 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7231 if (!csol) { 7232 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7233 } 7234 if (!crhs) { 7235 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7236 } 7237 } 7238 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7239 7240 /* compute null space for coarse solver if the benign trick has been requested */ 7241 if (pcbddc->benign_null) { 7242 7243 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7244 for (i=0;i<pcbddc->benign_n;i++) { 7245 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7246 } 7247 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7248 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7249 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7250 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7251 if (coarse_mat) { 7252 Vec nullv; 7253 PetscScalar *array,*array2; 7254 PetscInt nl; 7255 7256 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7257 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7258 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7259 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7260 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7261 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7262 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7263 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7264 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7265 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7266 } 7267 } 7268 7269 if (pcbddc->coarse_ksp) { 7270 PetscBool ispreonly; 7271 7272 if (CoarseNullSpace) { 7273 PetscBool isnull; 7274 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7275 if (isnull) { 7276 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7277 } 7278 /* TODO: add local nullspaces (if any) */ 7279 } 7280 /* setup coarse ksp */ 7281 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7282 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7283 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7284 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7285 KSP check_ksp; 7286 KSPType check_ksp_type; 7287 PC check_pc; 7288 Vec check_vec,coarse_vec; 7289 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7290 PetscInt its; 7291 PetscBool compute_eigs; 7292 PetscReal *eigs_r,*eigs_c; 7293 PetscInt neigs; 7294 const char *prefix; 7295 7296 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7297 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7298 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7299 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7300 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7301 /* prevent from setup unneeded object */ 7302 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7303 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7304 if (ispreonly) { 7305 check_ksp_type = KSPPREONLY; 7306 compute_eigs = PETSC_FALSE; 7307 } else { 7308 check_ksp_type = KSPGMRES; 7309 compute_eigs = PETSC_TRUE; 7310 } 7311 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7312 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7313 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7314 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7315 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7316 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7317 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7318 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7319 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7320 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7321 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7322 /* create random vec */ 7323 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7324 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7325 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7326 /* solve coarse problem */ 7327 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7328 /* set eigenvalue estimation if preonly has not been requested */ 7329 if (compute_eigs) { 7330 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7331 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7332 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7333 if (neigs) { 7334 lambda_max = eigs_r[neigs-1]; 7335 lambda_min = eigs_r[0]; 7336 if (pcbddc->use_coarse_estimates) { 7337 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7338 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7339 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7340 } 7341 } 7342 } 7343 } 7344 7345 /* check coarse problem residual error */ 7346 if (pcbddc->dbg_flag) { 7347 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7348 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7349 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7350 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7351 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7352 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7353 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7354 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7355 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7356 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7357 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7358 if (CoarseNullSpace) { 7359 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7360 } 7361 if (compute_eigs) { 7362 PetscReal lambda_max_s,lambda_min_s; 7363 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7364 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7365 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7366 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7367 for (i=0;i<neigs;i++) { 7368 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7369 } 7370 } 7371 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7372 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7373 } 7374 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7375 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7376 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7377 if (compute_eigs) { 7378 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7379 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7380 } 7381 } 7382 } 7383 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7384 /* print additional info */ 7385 if (pcbddc->dbg_flag) { 7386 /* waits until all processes reaches this point */ 7387 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7388 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7389 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7390 } 7391 7392 /* free memory */ 7393 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7394 PetscFunctionReturn(0); 7395 } 7396 7397 #undef __FUNCT__ 7398 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7399 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7400 { 7401 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7402 PC_IS* pcis = (PC_IS*)pc->data; 7403 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7404 IS subset,subset_mult,subset_n; 7405 PetscInt local_size,coarse_size=0; 7406 PetscInt *local_primal_indices=NULL; 7407 const PetscInt *t_local_primal_indices; 7408 PetscErrorCode ierr; 7409 7410 PetscFunctionBegin; 7411 /* Compute global number of coarse dofs */ 7412 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7413 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7414 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7415 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7416 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7417 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7418 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7419 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7420 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7421 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7422 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7423 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7424 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7425 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7426 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7427 7428 /* check numbering */ 7429 if (pcbddc->dbg_flag) { 7430 PetscScalar coarsesum,*array,*array2; 7431 PetscInt i; 7432 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7433 7434 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7435 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7436 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7437 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7438 /* counter */ 7439 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7440 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7441 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7442 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7443 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7444 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7445 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7446 for (i=0;i<pcbddc->local_primal_size;i++) { 7447 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7448 } 7449 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7450 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7451 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7452 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7453 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7454 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7455 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7456 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7457 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7458 for (i=0;i<pcis->n;i++) { 7459 if (array[i] != 0.0 && array[i] != array2[i]) { 7460 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7461 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7462 set_error = PETSC_TRUE; 7463 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7464 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7465 } 7466 } 7467 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7468 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7469 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7470 for (i=0;i<pcis->n;i++) { 7471 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7472 } 7473 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7474 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7475 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7476 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7477 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7478 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7479 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7480 PetscInt *gidxs; 7481 7482 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7483 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7484 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7485 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7486 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7487 for (i=0;i<pcbddc->local_primal_size;i++) { 7488 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7489 } 7490 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7491 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7492 } 7493 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7494 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7495 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7496 } 7497 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7498 /* get back data */ 7499 *coarse_size_n = coarse_size; 7500 *local_primal_indices_n = local_primal_indices; 7501 PetscFunctionReturn(0); 7502 } 7503 7504 #undef __FUNCT__ 7505 #define __FUNCT__ "PCBDDCGlobalToLocal" 7506 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7507 { 7508 IS localis_t; 7509 PetscInt i,lsize,*idxs,n; 7510 PetscScalar *vals; 7511 PetscErrorCode ierr; 7512 7513 PetscFunctionBegin; 7514 /* get indices in local ordering exploiting local to global map */ 7515 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7516 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7517 for (i=0;i<lsize;i++) vals[i] = 1.0; 7518 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7519 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7520 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7521 if (idxs) { /* multilevel guard */ 7522 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7523 } 7524 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7525 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7526 ierr = PetscFree(vals);CHKERRQ(ierr); 7527 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7528 /* now compute set in local ordering */ 7529 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7530 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7531 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7532 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7533 for (i=0,lsize=0;i<n;i++) { 7534 if (PetscRealPart(vals[i]) > 0.5) { 7535 lsize++; 7536 } 7537 } 7538 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7539 for (i=0,lsize=0;i<n;i++) { 7540 if (PetscRealPart(vals[i]) > 0.5) { 7541 idxs[lsize++] = i; 7542 } 7543 } 7544 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7545 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7546 *localis = localis_t; 7547 PetscFunctionReturn(0); 7548 } 7549 7550 #undef __FUNCT__ 7551 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7552 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7553 { 7554 PC_IS *pcis=(PC_IS*)pc->data; 7555 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7556 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7557 Mat S_j; 7558 PetscInt *used_xadj,*used_adjncy; 7559 PetscBool free_used_adj; 7560 PetscErrorCode ierr; 7561 7562 PetscFunctionBegin; 7563 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7564 free_used_adj = PETSC_FALSE; 7565 if (pcbddc->sub_schurs_layers == -1) { 7566 used_xadj = NULL; 7567 used_adjncy = NULL; 7568 } else { 7569 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7570 used_xadj = pcbddc->mat_graph->xadj; 7571 used_adjncy = pcbddc->mat_graph->adjncy; 7572 } else if (pcbddc->computed_rowadj) { 7573 used_xadj = pcbddc->mat_graph->xadj; 7574 used_adjncy = pcbddc->mat_graph->adjncy; 7575 } else { 7576 PetscBool flg_row=PETSC_FALSE; 7577 const PetscInt *xadj,*adjncy; 7578 PetscInt nvtxs; 7579 7580 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7581 if (flg_row) { 7582 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7583 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7584 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7585 free_used_adj = PETSC_TRUE; 7586 } else { 7587 pcbddc->sub_schurs_layers = -1; 7588 used_xadj = NULL; 7589 used_adjncy = NULL; 7590 } 7591 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7592 } 7593 } 7594 7595 /* setup sub_schurs data */ 7596 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7597 if (!sub_schurs->schur_explicit) { 7598 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7599 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7600 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7601 } else { 7602 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7603 PetscBool isseqaij,need_change = PETSC_FALSE; 7604 PetscInt benign_n; 7605 Mat change = NULL; 7606 Vec scaling = NULL; 7607 IS change_primal = NULL; 7608 7609 if (!pcbddc->use_vertices && reuse_solvers) { 7610 PetscInt n_vertices; 7611 7612 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7613 reuse_solvers = (PetscBool)!n_vertices; 7614 } 7615 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7616 if (!isseqaij) { 7617 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7618 if (matis->A == pcbddc->local_mat) { 7619 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7620 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7621 } else { 7622 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7623 } 7624 } 7625 if (!pcbddc->benign_change_explicit) { 7626 benign_n = pcbddc->benign_n; 7627 } else { 7628 benign_n = 0; 7629 } 7630 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7631 We need a global reduction to avoid possible deadlocks. 7632 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7633 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7634 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7635 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7636 need_change = (PetscBool)(!need_change); 7637 } 7638 /* If the user defines additional constraints, we import them here. 7639 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7640 if (need_change) { 7641 PC_IS *pcisf; 7642 PC_BDDC *pcbddcf; 7643 PC pcf; 7644 7645 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7646 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7647 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7648 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7649 /* hacks */ 7650 pcisf = (PC_IS*)pcf->data; 7651 pcisf->is_B_local = pcis->is_B_local; 7652 pcisf->vec1_N = pcis->vec1_N; 7653 pcisf->BtoNmap = pcis->BtoNmap; 7654 pcisf->n = pcis->n; 7655 pcisf->n_B = pcis->n_B; 7656 pcbddcf = (PC_BDDC*)pcf->data; 7657 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7658 pcbddcf->mat_graph = pcbddc->mat_graph; 7659 pcbddcf->use_faces = PETSC_TRUE; 7660 pcbddcf->use_change_of_basis = PETSC_TRUE; 7661 pcbddcf->use_change_on_faces = PETSC_TRUE; 7662 pcbddcf->use_qr_single = PETSC_TRUE; 7663 pcbddcf->fake_change = PETSC_TRUE; 7664 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7665 /* store information on primal vertices and change of basis (in local numbering) */ 7666 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7667 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7668 change = pcbddcf->ConstraintMatrix; 7669 pcbddcf->ConstraintMatrix = NULL; 7670 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7671 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7672 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7673 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7674 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7675 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7676 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7677 pcf->ops->destroy = NULL; 7678 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7679 } 7680 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7681 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7682 ierr = MatDestroy(&change);CHKERRQ(ierr); 7683 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7684 } 7685 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7686 7687 /* free adjacency */ 7688 if (free_used_adj) { 7689 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7690 } 7691 PetscFunctionReturn(0); 7692 } 7693 7694 #undef __FUNCT__ 7695 #define __FUNCT__ "PCBDDCInitSubSchurs" 7696 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7697 { 7698 PC_IS *pcis=(PC_IS*)pc->data; 7699 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7700 PCBDDCGraph graph; 7701 PetscErrorCode ierr; 7702 7703 PetscFunctionBegin; 7704 /* attach interface graph for determining subsets */ 7705 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7706 IS verticesIS,verticescomm; 7707 PetscInt vsize,*idxs; 7708 7709 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7710 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7711 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7712 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7713 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7714 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7715 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7716 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7717 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7718 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7719 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7720 } else { 7721 graph = pcbddc->mat_graph; 7722 } 7723 /* print some info */ 7724 if (pcbddc->dbg_flag) { 7725 IS vertices; 7726 PetscInt nv,nedges,nfaces; 7727 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7728 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7729 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7730 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7731 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7732 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7733 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7734 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7735 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7736 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7737 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7738 } 7739 7740 /* sub_schurs init */ 7741 if (!pcbddc->sub_schurs) { 7742 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7743 } 7744 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 7745 7746 /* free graph struct */ 7747 if (pcbddc->sub_schurs_rebuild) { 7748 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7749 } 7750 PetscFunctionReturn(0); 7751 } 7752 7753 #undef __FUNCT__ 7754 #define __FUNCT__ "PCBDDCCheckOperator" 7755 PetscErrorCode PCBDDCCheckOperator(PC pc) 7756 { 7757 PC_IS *pcis=(PC_IS*)pc->data; 7758 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7759 PetscErrorCode ierr; 7760 7761 PetscFunctionBegin; 7762 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7763 IS zerodiag = NULL; 7764 Mat S_j,B0_B=NULL; 7765 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7766 PetscScalar *p0_check,*array,*array2; 7767 PetscReal norm; 7768 PetscInt i; 7769 7770 /* B0 and B0_B */ 7771 if (zerodiag) { 7772 IS dummy; 7773 7774 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7775 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7776 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7777 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7778 } 7779 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7780 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7781 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7782 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7783 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7784 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7785 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7786 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7787 /* S_j */ 7788 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7789 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7790 7791 /* mimic vector in \widetilde{W}_\Gamma */ 7792 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7793 /* continuous in primal space */ 7794 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7795 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7796 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7797 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7798 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7799 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7800 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7801 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7802 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7803 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7804 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7805 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7806 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7807 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7808 7809 /* assemble rhs for coarse problem */ 7810 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7811 /* local with Schur */ 7812 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7813 if (zerodiag) { 7814 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7815 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7816 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7817 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7818 } 7819 /* sum on primal nodes the local contributions */ 7820 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7821 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7822 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7823 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7824 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7825 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7826 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7827 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7828 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7829 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7830 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7831 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7832 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7833 /* scale primal nodes (BDDC sums contibutions) */ 7834 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7835 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7836 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7837 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7838 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7839 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7840 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7841 /* global: \widetilde{B0}_B w_\Gamma */ 7842 if (zerodiag) { 7843 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7844 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7845 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7846 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7847 } 7848 /* BDDC */ 7849 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7850 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7851 7852 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7853 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7854 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7855 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7856 for (i=0;i<pcbddc->benign_n;i++) { 7857 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7858 } 7859 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7860 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7861 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7862 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7863 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7864 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7865 } 7866 PetscFunctionReturn(0); 7867 } 7868