1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 if (!esize) PetscFunctionReturn(0); 84 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 85 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 86 87 /* gradients */ 88 ptr = work + 5*esize; 89 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 90 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 91 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 92 ierr = MatDestroy(&GE);CHKERRQ(ierr); 93 94 /* constants */ 95 ptr += rsize*csize; 96 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 97 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 99 ierr = MatDestroy(&GE);CHKERRQ(ierr); 100 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 101 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "PCBDDCNedelecSupport" 107 PetscErrorCode PCBDDCNedelecSupport(PC pc) 108 { 109 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 110 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 111 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 112 MatNullSpace nnsp; 113 Vec tvec,*quads; 114 PetscSF sfv; 115 ISLocalToGlobalMapping el2g,vl2g,fl2g; 116 MPI_Comm comm; 117 IS lned,primals,allprimals,nedfieldlocal; 118 IS *eedges,*extrows,*extcols,*alleedges; 119 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee; 120 PetscScalar *vals,*work; 121 PetscReal *rwork; 122 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 123 PetscInt ne,nv,Lv,order,n,field; 124 PetscInt n_neigh,*neigh,*n_shared,**shared; 125 PetscInt i,j,extmem,cum,maxsize,rst,nee,nquads=2; 126 PetscInt *extrow,*extrowcum,*marks,*emarks,*vmarks,*gidxs; 127 PetscInt *sfvleaves,*sfvroots; 128 PetscBool print,eerr,done,lrc[2],conforming; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 /* test variable order code and print debug info TODO: to be removed */ 133 print = PETSC_FALSE; 134 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 135 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 136 137 /* Return to caller if there are no edges in the decomposition */ 138 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 139 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 140 ierr = ISLocalToGlobalMappingGetSize(el2g,&n);CHKERRQ(ierr); 141 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 142 lrc[0] = PETSC_FALSE; 143 for (i=0;i<n;i++) { 144 if (PetscRealPart(vals[i]) > 2.) { 145 lrc[0] = PETSC_TRUE; 146 break; 147 } 148 } 149 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 150 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 151 if (!lrc[1]) PetscFunctionReturn(0); 152 153 /* Get discrete gradient 154 If it is defined for a subset of dofs, assumes G is given in global ordering for all the dofs */ 155 G = pcbddc->discretegradient; 156 order = pcbddc->nedorder; 157 conforming = pcbddc->conforming; 158 field = pcbddc->nedfield; 159 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 160 if (pcbddc->n_ISForDofsLocal && field > -1) { 161 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 162 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 163 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 164 } else { 165 PetscBool testnedfield = PETSC_FALSE; 166 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 167 if (!testnedfield) { 168 ne = n; 169 nedfieldlocal = NULL; 170 } else { 171 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 172 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 173 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 174 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 175 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 176 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 177 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 178 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 179 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 180 for (i=0,cum=0;i<n;i++) { 181 if (matis->sf_leafdata[i] > 1) { 182 matis->sf_leafdata[cum++] = i; 183 } 184 } 185 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 186 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 187 } 188 } 189 190 if (nedfieldlocal) { /* merge with previous code when testing is done */ 191 IS is; 192 193 /* need to map from the local Nedelec field to local numbering */ 194 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 195 /* need to map from the local Nedelec field to global numbering */ 196 ierr = ISLocalToGlobalMappingApplyIS(el2g,nedfieldlocal,&is);CHKERRQ(ierr); 197 ierr = ISLocalToGlobalMappingCreateIS(is,&el2g);CHKERRQ(ierr); 198 ierr = ISDestroy(&is);CHKERRQ(ierr); 199 } else { 200 ierr = PetscObjectReference((PetscObject)el2g);CHKERRQ(ierr); 201 fl2g = NULL; 202 } 203 204 /* Sanity checks */ 205 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 206 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 207 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 208 209 /* Drop connections for interior edges (this modifies G) */ 210 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 211 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 212 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 213 if (nedfieldlocal) { 214 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 215 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 216 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 217 } else { 218 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 219 } 220 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 221 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 222 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 223 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 224 if (matis->sf_rootdata[i] < 2) { 225 matis->sf_rootdata[cum++] = i + rst; 226 } 227 } 228 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 229 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 230 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 231 232 /* Extract subdomain relevant rows of G */ 233 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 234 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 235 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 236 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 237 ierr = ISDestroy(&lned);CHKERRQ(ierr); 238 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 239 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 240 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 241 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 242 if (print) { 243 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 244 ierr = MatView(lG,NULL);CHKERRQ(ierr); 245 } 246 247 /* SF for nodal communications */ 248 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 249 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 250 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 251 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 252 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 253 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 254 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 255 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 257 258 /* Destroy temporary G created in MATIS format */ 259 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 260 261 /* Save lG */ 262 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 263 264 /* Analyze the edge-nodes connections (duplicate lG) */ 265 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 266 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 267 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 268 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 269 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 270 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 271 /* need to import the boundary specification to ensure the 272 proper detection of coarse edges' endpoints */ 273 if (pcbddc->DirichletBoundariesLocal) { 274 IS is; 275 276 if (fl2g) { 277 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 278 } else { 279 is = pcbddc->DirichletBoundariesLocal; 280 } 281 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 283 for (i=0;i<cum;i++) { 284 if (idxs[i] >= 0) { 285 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 286 } 287 } 288 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 289 if (fl2g) { 290 ierr = ISDestroy(&is);CHKERRQ(ierr); 291 } 292 } 293 if (pcbddc->NeumannBoundariesLocal) { 294 IS is; 295 296 if (fl2g) { 297 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 298 } else { 299 is = pcbddc->NeumannBoundariesLocal; 300 } 301 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 302 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 303 for (i=0;i<cum;i++) { 304 if (idxs[i] >= 0) { 305 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 306 } 307 } 308 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 309 if (fl2g) { 310 ierr = ISDestroy(&is);CHKERRQ(ierr); 311 } 312 } 313 314 /* need to remove coarse faces' dofs to ensure the 315 proper detection of coarse edges' endpoints */ 316 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 317 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 318 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 319 for (i=1;i<n_neigh;i++) 320 for (j=0;j<n_shared[i];j++) 321 marks[shared[i][j]]++; 322 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 323 for (i=0;i<ne;i++) { 324 if (marks[i] > 1 || (marks[i] == 1 && PetscBTLookup(btb,i))) { 325 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 326 } 327 } 328 329 if (!conforming) { 330 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 331 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 332 } 333 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 334 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 335 cum = 0; 336 for (i=0;i<ne;i++) { 337 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 338 if (!PetscBTLookup(btee,i)) { 339 marks[cum++] = i; 340 continue; 341 } 342 /* set badly connected edge dofs as primal */ 343 if (!conforming) { 344 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 345 marks[cum++] = i; 346 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 347 for (j=ii[i];j<ii[i+1];j++) { 348 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 349 } 350 } else { 351 /* every edge dofs should be connected trough a certain number of nodal dofs 352 to other edge dofs belonging to coarse edges 353 - at most 2 endpoints 354 - order-1 interior nodal dofs 355 - no undefined nodal dofs (nconn < order) 356 */ 357 PetscInt ends = 0,ints = 0, undef = 0; 358 for (j=ii[i];j<ii[i+1];j++) { 359 PetscInt v = jj[j],k; 360 PetscInt nconn = iit[v+1]-iit[v]; 361 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 362 if (nconn > order) ends++; 363 else if (nconn == order) ints++; 364 else undef++; 365 } 366 if (undef || ends > 2 || ints != order -1) { 367 marks[cum++] = i; 368 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 369 for (j=ii[i];j<ii[i+1];j++) { 370 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 371 } 372 } 373 } 374 } 375 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 376 if (!order && ii[i+1] != ii[i]) { 377 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 378 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 379 } 380 } 381 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 382 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 383 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 384 if (!conforming) { 385 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 386 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 387 } 388 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 389 /* identify splitpoints and corner candidates: TODO variable order */ 390 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 391 if (print) { 392 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 393 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 394 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 395 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 396 } 397 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 398 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 399 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 400 for (i=0;i<nv;i++) { 401 PetscInt ord = order, test = ii[i+1]-ii[i]; 402 if (!order) { 403 PetscReal vorder = 0.; 404 405 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 406 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 407 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 408 ord = 1; 409 } 410 #if defined(PETSC_USE_DEBUG) 411 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 412 #endif 413 if (test >= 3*ord) { /* splitpoints */ 414 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 415 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 416 } else if (test == ord) { 417 if (order == 1) { 418 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 419 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 420 } else { 421 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 422 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 423 } 424 } 425 } 426 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 427 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 428 429 /* Get the local G^T explicitly */ 430 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 431 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 432 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 433 434 /* Mark interior nodal dofs */ 435 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 436 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 437 for (i=1;i<n_neigh;i++) { 438 for (j=0;j<n_shared[i];j++) { 439 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 440 } 441 } 442 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 443 444 /* communicate corners and splitpoints */ 445 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 446 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 447 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 448 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 449 450 if (print) { 451 IS tbz; 452 453 cum = 0; 454 for (i=0;i<nv;i++) 455 if (sfvleaves[i]) 456 vmarks[cum++] = i; 457 458 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 459 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 460 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 461 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 462 } 463 464 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 465 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 466 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 467 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 468 469 /* Zero rows of lGt corresponding to identified corners 470 and interior nodal dofs */ 471 cum = 0; 472 for (i=0;i<nv;i++) { 473 if (sfvleaves[i]) { 474 vmarks[cum++] = i; 475 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 476 } 477 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 478 } 479 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 480 if (print) { 481 IS tbz; 482 483 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 484 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 485 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 486 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 487 } 488 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 489 ierr = PetscFree(vmarks);CHKERRQ(ierr); 490 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 491 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 492 493 /* Recompute G */ 494 ierr = MatDestroy(&lG);CHKERRQ(ierr); 495 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 496 if (print) { 497 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 498 ierr = MatView(lG,NULL);CHKERRQ(ierr); 499 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 500 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 501 } 502 503 /* Get primal dofs (if any) */ 504 cum = 0; 505 for (i=0;i<ne;i++) { 506 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 507 } 508 if (fl2g) { 509 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 510 } 511 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 512 if (print) { 513 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 514 ierr = ISView(primals,NULL);CHKERRQ(ierr); 515 } 516 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 517 /* TODO: what if the user passed in some of them ? */ 518 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 519 ierr = ISDestroy(&primals);CHKERRQ(ierr); 520 521 /* Compute edge connectivity */ 522 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 523 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 524 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 525 if (done && i && ii && jj) { /* when lG is empty, don't pass pointers */ 526 if (fl2g) { 527 PetscBT btf; 528 PetscInt *iia,*jja,*iiu,*jju; 529 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 530 531 /* create CSR for all local dofs */ 532 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 533 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 534 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 535 iiu = pcbddc->mat_graph->xadj; 536 jju = pcbddc->mat_graph->adjncy; 537 } else if (pcbddc->use_local_adj) { 538 rest = PETSC_TRUE; 539 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 540 } else { 541 free = PETSC_TRUE; 542 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 543 iiu[0] = 0; 544 for (i=0;i<n;i++) { 545 iiu[i+1] = i+1; 546 jju[i] = -1; 547 } 548 } 549 550 /* import sizes of CSR */ 551 iia[0] = 0; 552 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 553 554 /* overwrite entries corresponding to the Nedelec field */ 555 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 556 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 557 for (i=0;i<ne;i++) { 558 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 559 iia[idxs[i]+1] = ii[i+1]-ii[i]; 560 } 561 562 /* iia in CSR */ 563 for (i=0;i<n;i++) iia[i+1] += iia[i]; 564 565 /* jja in CSR */ 566 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 567 for (i=0;i<n;i++) 568 if (!PetscBTLookup(btf,i)) 569 for (j=0;j<iiu[i+1]-iiu[i];j++) 570 jja[iia[i]+j] = jju[iiu[i]+j]; 571 572 /* map edge dofs connectivity */ 573 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 574 for (i=0;i<ne;i++) { 575 PetscInt e = idxs[i]; 576 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 577 } 578 579 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 580 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 581 if (rest) { 582 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 583 } 584 if (free) { 585 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 586 } 587 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 588 } else { 589 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 590 } 591 } 592 593 /* Analyze interface for edge dofs */ 594 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 595 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 596 597 /* Get coarse edges in the edge space */ 598 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 599 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 600 601 if (fl2g) { 602 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 603 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 604 for (i=0;i<nee;i++) { 605 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 606 } 607 } else { 608 eedges = alleedges; 609 primals = allprimals; 610 } 611 612 /* Mark fine edge dofs with their coarse edge id */ 613 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 614 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 615 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 616 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 617 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 618 if (print) { 619 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 620 ierr = ISView(primals,NULL);CHKERRQ(ierr); 621 } 622 623 maxsize = 0; 624 for (i=0;i<nee;i++) { 625 PetscInt size,mark = i+1; 626 627 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 628 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 629 for (j=0;j<size;j++) marks[idxs[j]] = mark; 630 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 631 maxsize = PetscMax(maxsize,size); 632 } 633 634 /* Find coarse edge endpoints */ 635 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 636 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 637 for (i=0;i<nee;i++) { 638 PetscInt mark = i+1,size; 639 640 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 641 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 642 if (print) { 643 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 644 ISView(eedges[i],NULL); 645 } 646 for (j=0;j<size;j++) { 647 PetscInt k, ee = idxs[j]; 648 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 649 for (k=ii[ee];k<ii[ee+1];k++) { 650 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 651 if (PetscBTLookup(btv,jj[k])) { 652 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 653 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 654 PetscInt k2; 655 PetscBool corner = PETSC_FALSE; 656 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 657 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 658 /* it's a corner if either is connected with an edge dof belonging to a different cc or 659 if the edge dof lie on the natural part of the boundary */ 660 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 661 corner = PETSC_TRUE; 662 break; 663 } 664 } 665 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 666 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 667 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 668 } else { 669 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 670 } 671 } 672 } 673 } 674 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 675 } 676 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 677 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 678 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 679 680 /* Reset marked primal dofs */ 681 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 682 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 683 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 684 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 685 686 /* Now use the initial lG */ 687 ierr = MatDestroy(&lG);CHKERRQ(ierr); 688 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 689 lG = lGinit; 690 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 691 692 /* Compute extended cols indices */ 693 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 694 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 695 i *= maxsize; 696 ierr = PetscMalloc1(nee,&extcols);CHKERRQ(ierr); 697 ierr = PetscCalloc1(nee,&emarks);CHKERRQ(ierr); 698 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 699 eerr = PETSC_FALSE; 700 for (i=0;i<nee;i++) { 701 PetscInt size; 702 703 cum = 0; 704 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 705 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 706 for (j=0;j<size;j++) { 707 PetscInt k,ee = idxs[j]; 708 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 709 } 710 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 711 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 712 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 713 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 714 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 715 /* it may happen that endpoints are not defined at this point 716 if it is the case, mark this edge for a second pass */ 717 if (cum != size -1) { 718 emarks[i] = 1; 719 if (print) { 720 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 721 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 722 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 723 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 724 } 725 eerr = PETSC_TRUE; 726 } 727 } 728 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 729 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 730 if (done) { 731 PetscInt *newprimals; 732 733 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 734 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 735 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 736 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 738 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 739 for (i=0;i<nee;i++) { 740 if (emarks[i]) { 741 PetscInt size,mark = i+1; 742 743 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 744 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 745 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 746 for (j=0;j<size;j++) { 747 PetscInt k,ee = idxs[j]; 748 for (k=ii[ee];k<ii[ee+1];k++) { 749 /* set all candidates located on the edge as corners */ 750 if (PetscBTLookup(btvcand,jj[k])) { 751 PetscInt k2,vv = jj[k]; 752 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 753 /* set all edge dofs connected to candidate as primals */ 754 for (k2=iit[vv];k2<iit[vv+1];k2++) { 755 if (marks[jjt[k2]] == mark) { 756 PetscInt k3,ee2 = jjt[k2]; 757 newprimals[cum++] = ee2; 758 /* finally set the new corners */ 759 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 760 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 761 } 762 } 763 } 764 } 765 } 766 } 767 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 768 } 769 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 770 } 771 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 772 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 773 if (fl2g) { 774 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 775 ierr = ISDestroy(&primals);CHKERRQ(ierr); 776 for (i=0;i<nee;i++) { 777 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 778 } 779 ierr = PetscFree(eedges);CHKERRQ(ierr); 780 } 781 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 782 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 783 ierr = PetscFree(newprimals);CHKERRQ(ierr); 784 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 785 ierr = ISDestroy(&primals);CHKERRQ(ierr); 786 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 787 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 788 if (fl2g) { 789 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 790 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 791 for (i=0;i<nee;i++) { 792 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 793 } 794 } else { 795 eedges = alleedges; 796 primals = allprimals; 797 } 798 799 /* Mark again */ 800 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 801 for (i=0;i<nee;i++) { 802 PetscInt size,mark = i+1; 803 804 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 805 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 806 for (j=0;j<size;j++) marks[idxs[j]] = mark; 807 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 808 } 809 if (print) { 810 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 811 ierr = ISView(primals,NULL);CHKERRQ(ierr); 812 } 813 814 /* Recompute extended cols */ 815 eerr = PETSC_FALSE; 816 for (i=0;i<nee;i++) { 817 PetscInt size; 818 819 cum = 0; 820 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 821 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 822 for (j=0;j<size;j++) { 823 PetscInt k,ee = idxs[j]; 824 for (k=ii[ee];k<ii[ee+1];k++) { 825 if (!PetscBTLookup(btv,jj[k])) { 826 extrow[cum++] = jj[k]; 827 } 828 } 829 } 830 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 831 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 832 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 833 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 834 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 835 if (cum != size -1) { 836 if (print) { 837 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 838 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 839 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 840 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 841 } 842 eerr = PETSC_TRUE; 843 } 844 } 845 } 846 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 847 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 848 ierr = PetscFree(emarks);CHKERRQ(ierr); 849 /* an error should not occur at this point */ 850 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 851 852 /* Check the number of endpoints */ 853 /* TODO: fix case for circular edge */ 854 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 855 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 856 for (i=0;i<nee;i++) { 857 PetscInt size, found = 0; 858 859 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 860 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 861 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 862 for (j=0;j<size;j++) { 863 PetscInt k,ee = idxs[j]; 864 for (k=ii[ee];k<ii[ee+1];k++) { 865 PetscInt vv = jj[k]; 866 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 867 found++; 868 } 869 } 870 } 871 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 872 if (found != 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d\n",found,i); 873 } 874 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 875 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 876 877 #if defined(PETSC_USE_DEBUG) 878 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 879 not interfere with neighbouring coarse edges */ 880 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 881 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 882 for (i=0;i<nv;i++) { 883 PetscInt emax = 0,eemax = 0; 884 885 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 886 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 887 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 888 for (j=1;j<nee+1;j++) { 889 if (emax < emarks[j]) { 890 emax = emarks[j]; 891 eemax = j; 892 } 893 } 894 /* not relevant for edges */ 895 if (!eemax) continue; 896 897 for (j=ii[i];j<ii[i+1];j++) { 898 if (marks[jj[j]] && marks[jj[j]] != eemax) { 899 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 900 } 901 } 902 } 903 ierr = PetscFree(emarks);CHKERRQ(ierr); 904 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 905 #endif 906 907 /* Compute extended rows indices for edge blocks of the change of basis */ 908 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 909 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 910 extmem *= maxsize; 911 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 912 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 913 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 914 for (i=0;i<nv;i++) { 915 PetscInt mark = 0,size,start; 916 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 917 for (j=ii[i];j<ii[i+1];j++) 918 if (marks[jj[j]] && !mark) 919 mark = marks[jj[j]]; 920 921 /* not relevant */ 922 if (!mark) continue; 923 924 /* import extended row */ 925 mark--; 926 start = mark*extmem+extrowcum[mark]; 927 size = ii[i+1]-ii[i]; 928 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 929 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 930 extrowcum[mark] += size; 931 } 932 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 933 cum = 0; 934 for (i=0;i<nee;i++) { 935 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 936 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 937 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 938 cum = PetscMax(cum,size); 939 } 940 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 941 ierr = PetscFree(marks);CHKERRQ(ierr); 942 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 943 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 944 945 /* Workspace for lapack inner calls and VecSetValues */ 946 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 947 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 948 for (i=0;i<maxsize;i++) vals[i] = 1.; 949 950 /* Create vectors for quadrature rules */ 951 /* TODO preserve other quadratures */ 952 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 953 for (i=0;i<nquads;i++) { 954 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 955 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 956 } 957 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 958 959 /* Create change of basis matrix (preallocation can be improved) */ 960 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 961 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 962 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 963 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 964 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 965 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 966 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 967 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 968 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 969 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 970 971 /* Defaults to identity */ 972 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 973 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 974 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 975 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 976 977 for (i=0;i<nee;i++) { 978 Mat Gins = NULL, GKins = NULL; 979 980 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 981 if (Gins && GKins) { 982 PetscScalar *data; 983 const PetscInt *rows,*cols; 984 PetscInt nrh,nch,nrc,ncc; 985 986 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 987 /* H1 */ 988 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 989 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 990 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 991 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 992 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 993 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 994 /* complement */ 995 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 996 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 997 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 998 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 999 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1000 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1001 /* Gins kernel quadratures */ 1002 for (j=0;j<ncc;j++) { 1003 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 1004 } 1005 /* H1 average */ 1006 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1007 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1008 } 1009 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1010 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1011 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1012 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1013 } 1014 1015 /* Start assembling */ 1016 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1017 for (i=0;i<nquads;i++) { 1018 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 1019 } 1020 1021 /* Free */ 1022 if (fl2g) { 1023 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1024 for (i=0;i<nee;i++) { 1025 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1026 } 1027 ierr = PetscFree(eedges);CHKERRQ(ierr); 1028 } 1029 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1030 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1031 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1032 ierr = PetscFree(extrow);CHKERRQ(ierr); 1033 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1034 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1035 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1036 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1037 ierr = PetscFree(vals);CHKERRQ(ierr); 1038 ierr = PetscFree(extrows);CHKERRQ(ierr); 1039 ierr = PetscFree(extcols);CHKERRQ(ierr); 1040 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1041 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1042 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1043 1044 /* Complete assembling */ 1045 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1046 for (i=0;i<nquads;i++) { 1047 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 1048 } 1049 for (i=0;i<nquads;i++) { 1050 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 1051 } 1052 ierr = PetscFree(quads);CHKERRQ(ierr); 1053 1054 /* set change of basis */ 1055 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1056 ierr = MatDestroy(&T);CHKERRQ(ierr); 1057 1058 /* set quadratures */ 1059 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 1060 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 1061 1062 PetscFunctionReturn(0); 1063 } 1064 1065 /* the near-null space of BDDC carries information on quadrature weights, 1066 and these can be collinear -> so cheat with MatNullSpaceCreate 1067 and create a suitable set of basis vectors first */ 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1070 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1071 { 1072 PetscErrorCode ierr; 1073 PetscInt i; 1074 1075 PetscFunctionBegin; 1076 for (i=0;i<nvecs;i++) { 1077 PetscInt first,last; 1078 1079 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1080 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1081 if (i>=first && i < last) { 1082 PetscScalar *data; 1083 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1084 if (!has_const) { 1085 data[i-first] = 1.; 1086 } else { 1087 data[2*i-first] = 1./PetscSqrtReal(2.); 1088 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1089 } 1090 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1091 } 1092 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1093 } 1094 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1095 for (i=0;i<nvecs;i++) { /* reset vectors */ 1096 PetscInt first,last; 1097 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1098 if (i>=first && i < last) { 1099 PetscScalar *data; 1100 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1101 if (!has_const) { 1102 data[i-first] = 0.; 1103 } else { 1104 data[2*i-first] = 0.; 1105 data[2*i-first+1] = 0.; 1106 } 1107 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1108 } 1109 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1110 } 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1116 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1117 { 1118 Mat loc_divudotp; 1119 Vec p,v,vins,quad_vec,*quad_vecs; 1120 ISLocalToGlobalMapping map; 1121 IS *faces,*edges; 1122 PetscScalar *vals; 1123 const PetscScalar *array; 1124 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1125 PetscMPIInt rank; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1130 if (graph->twodim) { 1131 lmaxneighs = 2; 1132 } else { 1133 lmaxneighs = 1; 1134 for (i=0;i<ne;i++) { 1135 const PetscInt *idxs; 1136 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1137 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1138 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1139 } 1140 lmaxneighs++; /* graph count does not include self */ 1141 } 1142 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1143 maxsize = 0; 1144 for (i=0;i<ne;i++) { 1145 PetscInt nn; 1146 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1147 maxsize = PetscMax(maxsize,nn); 1148 } 1149 for (i=0;i<nf;i++) { 1150 PetscInt nn; 1151 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1152 maxsize = PetscMax(maxsize,nn); 1153 } 1154 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1155 /* create vectors to hold quadrature weights */ 1156 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1157 if (!transpose) { 1158 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1159 } else { 1160 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1161 } 1162 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1163 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1164 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1165 for (i=0;i<maxneighs;i++) { 1166 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1167 } 1168 1169 /* compute local quad vec */ 1170 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1171 if (!transpose) { 1172 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1173 } else { 1174 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1175 } 1176 ierr = VecSet(p,1.);CHKERRQ(ierr); 1177 if (!transpose) { 1178 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1179 } else { 1180 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1181 } 1182 if (vl2l) { 1183 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1184 } else { 1185 vins = v; 1186 } 1187 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1188 ierr = VecDestroy(&p);CHKERRQ(ierr); 1189 1190 /* insert in global quadrature vecs */ 1191 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1192 for (i=0;i<nf;i++) { 1193 const PetscInt *idxs; 1194 PetscInt idx,nn,j; 1195 1196 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1197 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1198 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1199 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1200 idx = -(idx+1); 1201 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1202 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1203 } 1204 for (i=0;i<ne;i++) { 1205 const PetscInt *idxs; 1206 PetscInt idx,nn,j; 1207 1208 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1209 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1210 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1211 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1212 idx = -(idx+1); 1213 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1214 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1215 } 1216 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1217 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1218 if (vl2l) { 1219 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1220 } 1221 ierr = VecDestroy(&v);CHKERRQ(ierr); 1222 ierr = PetscFree(vals);CHKERRQ(ierr); 1223 1224 /* assemble near null space */ 1225 for (i=0;i<maxneighs;i++) { 1226 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1227 } 1228 for (i=0;i<maxneighs;i++) { 1229 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1230 } 1231 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1238 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1239 { 1240 PetscErrorCode ierr; 1241 Vec local,global; 1242 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1243 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1244 1245 PetscFunctionBegin; 1246 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1247 /* need to convert from global to local topology information and remove references to information in global ordering */ 1248 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1249 if (pcbddc->user_provided_isfordofs) { 1250 if (pcbddc->n_ISForDofs) { 1251 PetscInt i; 1252 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1253 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1254 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1255 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1256 } 1257 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1258 pcbddc->n_ISForDofs = 0; 1259 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1260 } 1261 } else { 1262 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1263 PetscInt i, n = matis->A->rmap->n; 1264 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1265 if (i > 1) { 1266 pcbddc->n_ISForDofsLocal = i; 1267 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1268 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1269 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1270 } 1271 } 1272 } 1273 } 1274 1275 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1276 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1277 } 1278 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1279 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1280 } 1281 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1282 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1283 } 1284 ierr = VecDestroy(&global);CHKERRQ(ierr); 1285 ierr = VecDestroy(&local);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1291 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1292 { 1293 PC_IS *pcis = (PC_IS*)(pc->data); 1294 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1295 PetscErrorCode ierr; 1296 1297 PetscFunctionBegin; 1298 if (!pcbddc->benign_have_null) { 1299 PetscFunctionReturn(0); 1300 } 1301 if (pcbddc->ChangeOfBasisMatrix) { 1302 Vec swap; 1303 1304 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1305 swap = pcbddc->work_change; 1306 pcbddc->work_change = r; 1307 r = swap; 1308 } 1309 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1310 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1311 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1312 ierr = VecSet(z,0.);CHKERRQ(ierr); 1313 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1314 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1315 if (pcbddc->ChangeOfBasisMatrix) { 1316 pcbddc->work_change = r; 1317 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1318 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1319 } 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1325 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1326 { 1327 PCBDDCBenignMatMult_ctx ctx; 1328 PetscErrorCode ierr; 1329 PetscBool apply_right,apply_left,reset_x; 1330 1331 PetscFunctionBegin; 1332 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1333 if (transpose) { 1334 apply_right = ctx->apply_left; 1335 apply_left = ctx->apply_right; 1336 } else { 1337 apply_right = ctx->apply_right; 1338 apply_left = ctx->apply_left; 1339 } 1340 reset_x = PETSC_FALSE; 1341 if (apply_right) { 1342 const PetscScalar *ax; 1343 PetscInt nl,i; 1344 1345 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1346 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1347 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1348 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1349 for (i=0;i<ctx->benign_n;i++) { 1350 PetscScalar sum,val; 1351 const PetscInt *idxs; 1352 PetscInt nz,j; 1353 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1354 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1355 sum = 0.; 1356 if (ctx->apply_p0) { 1357 val = ctx->work[idxs[nz-1]]; 1358 for (j=0;j<nz-1;j++) { 1359 sum += ctx->work[idxs[j]]; 1360 ctx->work[idxs[j]] += val; 1361 } 1362 } else { 1363 for (j=0;j<nz-1;j++) { 1364 sum += ctx->work[idxs[j]]; 1365 } 1366 } 1367 ctx->work[idxs[nz-1]] -= sum; 1368 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1369 } 1370 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1371 reset_x = PETSC_TRUE; 1372 } 1373 if (transpose) { 1374 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1375 } else { 1376 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1377 } 1378 if (reset_x) { 1379 ierr = VecResetArray(x);CHKERRQ(ierr); 1380 } 1381 if (apply_left) { 1382 PetscScalar *ay; 1383 PetscInt i; 1384 1385 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1386 for (i=0;i<ctx->benign_n;i++) { 1387 PetscScalar sum,val; 1388 const PetscInt *idxs; 1389 PetscInt nz,j; 1390 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1391 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1392 val = -ay[idxs[nz-1]]; 1393 if (ctx->apply_p0) { 1394 sum = 0.; 1395 for (j=0;j<nz-1;j++) { 1396 sum += ay[idxs[j]]; 1397 ay[idxs[j]] += val; 1398 } 1399 ay[idxs[nz-1]] += sum; 1400 } else { 1401 for (j=0;j<nz-1;j++) { 1402 ay[idxs[j]] += val; 1403 } 1404 ay[idxs[nz-1]] = 0.; 1405 } 1406 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1407 } 1408 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1415 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1416 { 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1426 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1427 { 1428 PetscErrorCode ierr; 1429 1430 PetscFunctionBegin; 1431 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "PCBDDCBenignShellMat" 1437 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1438 { 1439 PC_IS *pcis = (PC_IS*)pc->data; 1440 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1441 PCBDDCBenignMatMult_ctx ctx; 1442 PetscErrorCode ierr; 1443 1444 PetscFunctionBegin; 1445 if (!restore) { 1446 Mat A_IB,A_BI; 1447 PetscScalar *work; 1448 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1449 1450 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1451 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1452 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1453 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1454 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1455 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1456 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1457 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1458 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1459 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1460 ctx->apply_left = PETSC_TRUE; 1461 ctx->apply_right = PETSC_FALSE; 1462 ctx->apply_p0 = PETSC_FALSE; 1463 ctx->benign_n = pcbddc->benign_n; 1464 if (reuse) { 1465 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1466 ctx->free = PETSC_FALSE; 1467 } else { /* TODO: could be optimized for successive solves */ 1468 ISLocalToGlobalMapping N_to_D; 1469 PetscInt i; 1470 1471 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1472 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1473 for (i=0;i<pcbddc->benign_n;i++) { 1474 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1475 } 1476 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1477 ctx->free = PETSC_TRUE; 1478 } 1479 ctx->A = pcis->A_IB; 1480 ctx->work = work; 1481 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1482 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1483 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1484 pcis->A_IB = A_IB; 1485 1486 /* A_BI as A_IB^T */ 1487 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1488 pcbddc->benign_original_mat = pcis->A_BI; 1489 pcis->A_BI = A_BI; 1490 } else { 1491 if (!pcbddc->benign_original_mat) { 1492 PetscFunctionReturn(0); 1493 } 1494 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1495 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1496 pcis->A_IB = ctx->A; 1497 ctx->A = NULL; 1498 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1499 pcis->A_BI = pcbddc->benign_original_mat; 1500 pcbddc->benign_original_mat = NULL; 1501 if (ctx->free) { 1502 PetscInt i; 1503 for (i=0;i<ctx->benign_n;i++) { 1504 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1505 } 1506 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1507 } 1508 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1509 ierr = PetscFree(ctx);CHKERRQ(ierr); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 /* used just in bddc debug mode */ 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "PCBDDCBenignProject" 1517 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1518 { 1519 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1520 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1521 Mat An; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1526 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1527 if (is1) { 1528 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1529 ierr = MatDestroy(&An);CHKERRQ(ierr); 1530 } else { 1531 *B = An; 1532 } 1533 PetscFunctionReturn(0); 1534 } 1535 1536 /* TODO: add reuse flag */ 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatSeqAIJCompress" 1539 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1540 { 1541 Mat Bt; 1542 PetscScalar *a,*bdata; 1543 const PetscInt *ii,*ij; 1544 PetscInt m,n,i,nnz,*bii,*bij; 1545 PetscBool flg_row; 1546 PetscErrorCode ierr; 1547 1548 PetscFunctionBegin; 1549 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1550 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1551 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1552 nnz = n; 1553 for (i=0;i<ii[n];i++) { 1554 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1555 } 1556 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1557 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1558 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1559 nnz = 0; 1560 bii[0] = 0; 1561 for (i=0;i<n;i++) { 1562 PetscInt j; 1563 for (j=ii[i];j<ii[i+1];j++) { 1564 PetscScalar entry = a[j]; 1565 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1566 bij[nnz] = ij[j]; 1567 bdata[nnz] = entry; 1568 nnz++; 1569 } 1570 } 1571 bii[i+1] = nnz; 1572 } 1573 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1574 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1575 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1576 { 1577 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1578 b->free_a = PETSC_TRUE; 1579 b->free_ij = PETSC_TRUE; 1580 } 1581 *B = Bt; 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatDetectDisconnectedComponents" 1587 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1588 { 1589 Mat B; 1590 IS is_dummy,*cc_n; 1591 ISLocalToGlobalMapping l2gmap_dummy; 1592 PCBDDCGraph graph; 1593 PetscInt i,n; 1594 PetscInt *xadj,*adjncy; 1595 PetscInt *xadj_filtered,*adjncy_filtered; 1596 PetscBool flg_row,isseqaij; 1597 PetscErrorCode ierr; 1598 1599 PetscFunctionBegin; 1600 if (!A->rmap->N || !A->cmap->N) { 1601 *ncc = 0; 1602 *cc = NULL; 1603 PetscFunctionReturn(0); 1604 } 1605 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1606 if (!isseqaij && filter) { 1607 PetscBool isseqdense; 1608 1609 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1610 if (!isseqdense) { 1611 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1612 } else { /* TODO: rectangular case and LDA */ 1613 PetscScalar *array; 1614 PetscReal chop=1.e-6; 1615 1616 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1617 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1618 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1619 for (i=0;i<n;i++) { 1620 PetscInt j; 1621 for (j=i+1;j<n;j++) { 1622 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1623 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1624 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1625 } 1626 } 1627 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1628 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1629 } 1630 } else { 1631 B = A; 1632 } 1633 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1634 1635 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1636 if (filter) { 1637 PetscScalar *data; 1638 PetscInt j,cum; 1639 1640 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1641 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1642 cum = 0; 1643 for (i=0;i<n;i++) { 1644 PetscInt t; 1645 1646 for (j=xadj[i];j<xadj[i+1];j++) { 1647 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1648 continue; 1649 } 1650 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1651 } 1652 t = xadj_filtered[i]; 1653 xadj_filtered[i] = cum; 1654 cum += t; 1655 } 1656 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1657 } else { 1658 xadj_filtered = NULL; 1659 adjncy_filtered = NULL; 1660 } 1661 1662 /* compute local connected components using PCBDDCGraph */ 1663 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1664 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1665 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1666 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1667 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1668 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1669 if (xadj_filtered) { 1670 graph->xadj = xadj_filtered; 1671 graph->adjncy = adjncy_filtered; 1672 } else { 1673 graph->xadj = xadj; 1674 graph->adjncy = adjncy; 1675 } 1676 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1677 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1678 /* partial clean up */ 1679 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1680 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1681 if (A != B) { 1682 ierr = MatDestroy(&B);CHKERRQ(ierr); 1683 } 1684 1685 /* get back data */ 1686 if (ncc) *ncc = graph->ncc; 1687 if (cc) { 1688 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1689 for (i=0;i<graph->ncc;i++) { 1690 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1691 } 1692 *cc = cc_n; 1693 } 1694 /* clean up graph */ 1695 graph->xadj = 0; 1696 graph->adjncy = 0; 1697 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "PCBDDCBenignCheck" 1703 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1704 { 1705 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1706 PC_IS* pcis = (PC_IS*)(pc->data); 1707 IS dirIS = NULL; 1708 PetscInt i; 1709 PetscErrorCode ierr; 1710 1711 PetscFunctionBegin; 1712 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1713 if (zerodiag) { 1714 Mat A; 1715 Vec vec3_N; 1716 PetscScalar *vals; 1717 const PetscInt *idxs; 1718 PetscInt nz,*count; 1719 1720 /* p0 */ 1721 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1722 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1723 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1724 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1725 for (i=0;i<nz;i++) vals[i] = 1.; 1726 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1727 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1728 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1729 /* v_I */ 1730 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1731 for (i=0;i<nz;i++) vals[i] = 0.; 1732 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1733 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1734 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1735 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1736 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1737 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1738 if (dirIS) { 1739 PetscInt n; 1740 1741 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1742 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1743 for (i=0;i<n;i++) vals[i] = 0.; 1744 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1745 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1746 } 1747 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1748 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1749 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1750 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1751 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1752 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1753 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1754 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1755 ierr = PetscFree(vals);CHKERRQ(ierr); 1756 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1757 1758 /* there should not be any pressure dofs lying on the interface */ 1759 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1760 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1761 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1762 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1763 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1764 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1765 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1766 ierr = PetscFree(count);CHKERRQ(ierr); 1767 } 1768 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1769 1770 /* check PCBDDCBenignGetOrSetP0 */ 1771 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1772 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1773 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1774 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1775 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1776 for (i=0;i<pcbddc->benign_n;i++) { 1777 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1778 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1779 } 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1785 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1786 { 1787 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1788 IS pressures,zerodiag,*zerodiag_subs; 1789 PetscInt nz,n; 1790 PetscInt *interior_dofs,n_interior_dofs; 1791 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1792 PetscErrorCode ierr; 1793 1794 PetscFunctionBegin; 1795 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1796 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1797 for (n=0;n<pcbddc->benign_n;n++) { 1798 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1799 } 1800 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1801 pcbddc->benign_n = 0; 1802 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1803 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1804 Checks if all the pressure dofs in each subdomain have a zero diagonal 1805 If not, a change of basis on pressures is not needed 1806 since the local Schur complements are already SPD 1807 */ 1808 has_null_pressures = PETSC_TRUE; 1809 have_null = PETSC_TRUE; 1810 if (pcbddc->n_ISForDofsLocal) { 1811 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1812 1813 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1814 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1815 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1816 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1817 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1818 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1819 if (!sorted) { 1820 ierr = ISSort(pressures);CHKERRQ(ierr); 1821 } 1822 } else { 1823 pressures = NULL; 1824 } 1825 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1826 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1827 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1828 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1829 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1830 if (!sorted) { 1831 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1832 } 1833 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1834 if (!nz) { 1835 if (n) have_null = PETSC_FALSE; 1836 has_null_pressures = PETSC_FALSE; 1837 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1838 } 1839 recompute_zerodiag = PETSC_FALSE; 1840 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1841 zerodiag_subs = NULL; 1842 pcbddc->benign_n = 0; 1843 n_interior_dofs = 0; 1844 interior_dofs = NULL; 1845 if (pcbddc->current_level) { /* need to compute interior nodes */ 1846 PetscInt n,i,j; 1847 PetscInt n_neigh,*neigh,*n_shared,**shared; 1848 PetscInt *iwork; 1849 1850 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1851 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1852 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1853 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1854 for (i=1;i<n_neigh;i++) 1855 for (j=0;j<n_shared[i];j++) 1856 iwork[shared[i][j]] += 1; 1857 for (i=0;i<n;i++) 1858 if (!iwork[i]) 1859 interior_dofs[n_interior_dofs++] = i; 1860 ierr = PetscFree(iwork);CHKERRQ(ierr); 1861 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1862 } 1863 if (has_null_pressures) { 1864 IS *subs; 1865 PetscInt nsubs,i,j,nl; 1866 const PetscInt *idxs; 1867 PetscScalar *array; 1868 Vec *work; 1869 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1870 1871 subs = pcbddc->local_subs; 1872 nsubs = pcbddc->n_local_subs; 1873 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1874 if (pcbddc->current_level) { 1875 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1876 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1877 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1878 /* work[0] = 1_p */ 1879 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1880 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1881 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1882 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1883 /* work[0] = 1_v */ 1884 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1885 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1886 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1887 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1888 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1889 } 1890 if (nsubs > 1) { 1891 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1892 for (i=0;i<nsubs;i++) { 1893 ISLocalToGlobalMapping l2g; 1894 IS t_zerodiag_subs; 1895 PetscInt nl; 1896 1897 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1898 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1899 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1900 if (nl) { 1901 PetscBool valid = PETSC_TRUE; 1902 1903 if (pcbddc->current_level) { 1904 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1905 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1906 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1907 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1908 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1909 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1910 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 1911 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 1912 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1913 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 1914 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1915 for (j=0;j<n_interior_dofs;j++) { 1916 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1917 valid = PETSC_FALSE; 1918 break; 1919 } 1920 } 1921 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1922 } 1923 if (valid && pcbddc->NeumannBoundariesLocal) { 1924 IS t_bc; 1925 PetscInt nzb; 1926 1927 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 1928 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 1929 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 1930 if (nzb) valid = PETSC_FALSE; 1931 } 1932 if (valid && pressures) { 1933 IS t_pressure_subs; 1934 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 1935 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 1936 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 1937 } 1938 if (valid) { 1939 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 1940 pcbddc->benign_n++; 1941 } else { 1942 recompute_zerodiag = PETSC_TRUE; 1943 } 1944 } 1945 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 1946 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 1947 } 1948 } else { /* there's just one subdomain (or zero if they have not been detected */ 1949 PetscBool valid = PETSC_TRUE; 1950 1951 if (pcbddc->NeumannBoundariesLocal) { 1952 PetscInt nzb; 1953 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 1954 if (nzb) valid = PETSC_FALSE; 1955 } 1956 if (valid && pressures) { 1957 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 1958 } 1959 if (valid && pcbddc->current_level) { 1960 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 1961 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 1962 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1963 for (j=0;j<n_interior_dofs;j++) { 1964 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1965 valid = PETSC_FALSE; 1966 break; 1967 } 1968 } 1969 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1970 } 1971 if (valid) { 1972 pcbddc->benign_n = 1; 1973 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 1974 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 1975 zerodiag_subs[0] = zerodiag; 1976 } 1977 } 1978 if (pcbddc->current_level) { 1979 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 1980 } 1981 } 1982 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 1983 1984 if (!pcbddc->benign_n) { 1985 PetscInt n; 1986 1987 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1988 recompute_zerodiag = PETSC_FALSE; 1989 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1990 if (n) { 1991 has_null_pressures = PETSC_FALSE; 1992 have_null = PETSC_FALSE; 1993 } 1994 } 1995 1996 /* final check for null pressures */ 1997 if (zerodiag && pressures) { 1998 PetscInt nz,np; 1999 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2000 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2001 if (nz != np) have_null = PETSC_FALSE; 2002 } 2003 2004 if (recompute_zerodiag) { 2005 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2006 if (pcbddc->benign_n == 1) { 2007 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2008 zerodiag = zerodiag_subs[0]; 2009 } else { 2010 PetscInt i,nzn,*new_idxs; 2011 2012 nzn = 0; 2013 for (i=0;i<pcbddc->benign_n;i++) { 2014 PetscInt ns; 2015 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2016 nzn += ns; 2017 } 2018 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2019 nzn = 0; 2020 for (i=0;i<pcbddc->benign_n;i++) { 2021 PetscInt ns,*idxs; 2022 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2023 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2024 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2025 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2026 nzn += ns; 2027 } 2028 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2029 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2030 } 2031 have_null = PETSC_FALSE; 2032 } 2033 2034 /* Prepare matrix to compute no-net-flux */ 2035 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2036 Mat A,loc_divudotp; 2037 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2038 IS row,col,isused = NULL; 2039 PetscInt M,N,n,st,n_isused; 2040 2041 if (pressures) { 2042 isused = pressures; 2043 } else { 2044 isused = zerodiag; 2045 } 2046 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2047 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2048 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2049 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2050 n_isused = 0; 2051 if (isused) { 2052 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2053 } 2054 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2055 st = st-n_isused; 2056 if (n) { 2057 const PetscInt *gidxs; 2058 2059 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2060 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2061 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2062 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2063 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2064 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2065 } else { 2066 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2067 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2068 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2069 } 2070 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2071 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2072 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2073 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2074 ierr = ISDestroy(&row);CHKERRQ(ierr); 2075 ierr = ISDestroy(&col);CHKERRQ(ierr); 2076 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2077 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2078 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2079 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2080 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2081 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2082 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2083 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2084 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2085 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2086 } 2087 2088 /* change of basis and p0 dofs */ 2089 if (has_null_pressures) { 2090 IS zerodiagc; 2091 const PetscInt *idxs,*idxsc; 2092 PetscInt i,s,*nnz; 2093 2094 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2095 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2096 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2097 /* local change of basis for pressures */ 2098 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2099 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2100 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2101 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2102 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2103 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2104 for (i=0;i<pcbddc->benign_n;i++) { 2105 PetscInt nzs,j; 2106 2107 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2108 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2109 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2110 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2111 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2112 } 2113 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2114 ierr = PetscFree(nnz);CHKERRQ(ierr); 2115 /* set identity on velocities */ 2116 for (i=0;i<n-nz;i++) { 2117 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2118 } 2119 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2120 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2121 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2122 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2123 /* set change on pressures */ 2124 for (s=0;s<pcbddc->benign_n;s++) { 2125 PetscScalar *array; 2126 PetscInt nzs; 2127 2128 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2129 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2130 for (i=0;i<nzs-1;i++) { 2131 PetscScalar vals[2]; 2132 PetscInt cols[2]; 2133 2134 cols[0] = idxs[i]; 2135 cols[1] = idxs[nzs-1]; 2136 vals[0] = 1.; 2137 vals[1] = 1.; 2138 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2139 } 2140 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2141 for (i=0;i<nzs-1;i++) array[i] = -1.; 2142 array[nzs-1] = 1.; 2143 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2144 /* store local idxs for p0 */ 2145 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2146 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2147 ierr = PetscFree(array);CHKERRQ(ierr); 2148 } 2149 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2150 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2151 /* project if needed */ 2152 if (pcbddc->benign_change_explicit) { 2153 Mat M; 2154 2155 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2156 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2157 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2158 ierr = MatDestroy(&M);CHKERRQ(ierr); 2159 } 2160 /* store global idxs for p0 */ 2161 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2162 } 2163 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2164 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2165 2166 /* determines if the coarse solver will be singular or not */ 2167 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2168 /* determines if the problem has subdomains with 0 pressure block */ 2169 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2170 *zerodiaglocal = zerodiag; 2171 PetscFunctionReturn(0); 2172 } 2173 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2176 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2177 { 2178 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2179 PetscScalar *array; 2180 PetscErrorCode ierr; 2181 2182 PetscFunctionBegin; 2183 if (!pcbddc->benign_sf) { 2184 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2185 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2186 } 2187 if (get) { 2188 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2189 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2190 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2191 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2192 } else { 2193 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2194 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2195 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2196 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2197 } 2198 PetscFunctionReturn(0); 2199 } 2200 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2203 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2204 { 2205 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2206 PetscErrorCode ierr; 2207 2208 PetscFunctionBegin; 2209 /* TODO: add error checking 2210 - avoid nested pop (or push) calls. 2211 - cannot push before pop. 2212 - cannot call this if pcbddc->local_mat is NULL 2213 */ 2214 if (!pcbddc->benign_n) { 2215 PetscFunctionReturn(0); 2216 } 2217 if (pop) { 2218 if (pcbddc->benign_change_explicit) { 2219 IS is_p0; 2220 MatReuse reuse; 2221 2222 /* extract B_0 */ 2223 reuse = MAT_INITIAL_MATRIX; 2224 if (pcbddc->benign_B0) { 2225 reuse = MAT_REUSE_MATRIX; 2226 } 2227 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2228 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2229 /* remove rows and cols from local problem */ 2230 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2231 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2232 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2233 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2234 } else { 2235 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2236 PetscScalar *vals; 2237 PetscInt i,n,*idxs_ins; 2238 2239 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2240 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2241 if (!pcbddc->benign_B0) { 2242 PetscInt *nnz; 2243 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2244 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2245 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2246 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2247 for (i=0;i<pcbddc->benign_n;i++) { 2248 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2249 nnz[i] = n - nnz[i]; 2250 } 2251 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2252 ierr = PetscFree(nnz);CHKERRQ(ierr); 2253 } 2254 2255 for (i=0;i<pcbddc->benign_n;i++) { 2256 PetscScalar *array; 2257 PetscInt *idxs,j,nz,cum; 2258 2259 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2260 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2261 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2262 for (j=0;j<nz;j++) vals[j] = 1.; 2263 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2264 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2265 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2266 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2267 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2268 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2269 cum = 0; 2270 for (j=0;j<n;j++) { 2271 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2272 vals[cum] = array[j]; 2273 idxs_ins[cum] = j; 2274 cum++; 2275 } 2276 } 2277 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2278 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2279 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2280 } 2281 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2282 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2283 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2284 } 2285 } else { /* push */ 2286 if (pcbddc->benign_change_explicit) { 2287 PetscInt i; 2288 2289 for (i=0;i<pcbddc->benign_n;i++) { 2290 PetscScalar *B0_vals; 2291 PetscInt *B0_cols,B0_ncol; 2292 2293 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2294 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2295 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2296 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2297 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2298 } 2299 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2300 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2301 } else { 2302 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2303 } 2304 } 2305 PetscFunctionReturn(0); 2306 } 2307 2308 #undef __FUNCT__ 2309 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2310 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2311 { 2312 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2313 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2314 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2315 PetscBLASInt *B_iwork,*B_ifail; 2316 PetscScalar *work,lwork; 2317 PetscScalar *St,*S,*eigv; 2318 PetscScalar *Sarray,*Starray; 2319 PetscReal *eigs,thresh; 2320 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2321 PetscBool allocated_S_St; 2322 #if defined(PETSC_USE_COMPLEX) 2323 PetscReal *rwork; 2324 #endif 2325 PetscErrorCode ierr; 2326 2327 PetscFunctionBegin; 2328 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2329 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2330 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2331 2332 if (pcbddc->dbg_flag) { 2333 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2335 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2336 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2337 } 2338 2339 if (pcbddc->dbg_flag) { 2340 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2341 } 2342 2343 /* max size of subsets */ 2344 mss = 0; 2345 for (i=0;i<sub_schurs->n_subs;i++) { 2346 PetscInt subset_size; 2347 2348 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2349 mss = PetscMax(mss,subset_size); 2350 } 2351 2352 /* min/max and threshold */ 2353 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2354 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2355 nmax = PetscMax(nmin,nmax); 2356 allocated_S_St = PETSC_FALSE; 2357 if (nmin) { 2358 allocated_S_St = PETSC_TRUE; 2359 } 2360 2361 /* allocate lapack workspace */ 2362 cum = cum2 = 0; 2363 maxneigs = 0; 2364 for (i=0;i<sub_schurs->n_subs;i++) { 2365 PetscInt n,subset_size; 2366 2367 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2368 n = PetscMin(subset_size,nmax); 2369 cum += subset_size; 2370 cum2 += subset_size*n; 2371 maxneigs = PetscMax(maxneigs,n); 2372 } 2373 if (mss) { 2374 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2375 PetscBLASInt B_itype = 1; 2376 PetscBLASInt B_N = mss; 2377 PetscReal zero = 0.0; 2378 PetscReal eps = 0.0; /* dlamch? */ 2379 2380 B_lwork = -1; 2381 S = NULL; 2382 St = NULL; 2383 eigs = NULL; 2384 eigv = NULL; 2385 B_iwork = NULL; 2386 B_ifail = NULL; 2387 #if defined(PETSC_USE_COMPLEX) 2388 rwork = NULL; 2389 #endif 2390 thresh = 1.0; 2391 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2392 #if defined(PETSC_USE_COMPLEX) 2393 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2394 #else 2395 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2396 #endif 2397 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2398 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2399 } else { 2400 /* TODO */ 2401 } 2402 } else { 2403 lwork = 0; 2404 } 2405 2406 nv = 0; 2407 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2408 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2409 } 2410 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2411 if (allocated_S_St) { 2412 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2413 } 2414 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2415 #if defined(PETSC_USE_COMPLEX) 2416 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2417 #endif 2418 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2419 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2420 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2421 nv+cum,&pcbddc->adaptive_constraints_idxs, 2422 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2423 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2424 2425 maxneigs = 0; 2426 cum = cumarray = 0; 2427 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2428 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2429 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2430 const PetscInt *idxs; 2431 2432 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2433 for (cum=0;cum<nv;cum++) { 2434 pcbddc->adaptive_constraints_n[cum] = 1; 2435 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2436 pcbddc->adaptive_constraints_data[cum] = 1.0; 2437 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2438 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2439 } 2440 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2441 } 2442 2443 if (mss) { /* multilevel */ 2444 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2445 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2446 } 2447 2448 thresh = pcbddc->adaptive_threshold; 2449 for (i=0;i<sub_schurs->n_subs;i++) { 2450 const PetscInt *idxs; 2451 PetscReal upper,lower; 2452 PetscInt j,subset_size,eigs_start = 0; 2453 PetscBLASInt B_N; 2454 PetscBool same_data = PETSC_FALSE; 2455 2456 if (pcbddc->use_deluxe_scaling) { 2457 upper = PETSC_MAX_REAL; 2458 lower = thresh; 2459 } else { 2460 upper = 1./thresh; 2461 lower = 0.; 2462 } 2463 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2464 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2465 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2466 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2467 if (sub_schurs->is_hermitian) { 2468 PetscInt j,k; 2469 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2470 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2471 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2472 } 2473 for (j=0;j<subset_size;j++) { 2474 for (k=j;k<subset_size;k++) { 2475 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2476 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2477 } 2478 } 2479 } else { 2480 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2481 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2482 } 2483 } else { 2484 S = Sarray + cumarray; 2485 St = Starray + cumarray; 2486 } 2487 /* see if we can save some work */ 2488 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2489 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2490 } 2491 2492 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2493 B_neigs = 0; 2494 } else { 2495 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2496 PetscBLASInt B_itype = 1; 2497 PetscBLASInt B_IL, B_IU; 2498 PetscReal eps = -1.0; /* dlamch? */ 2499 PetscInt nmin_s; 2500 PetscBool compute_range = PETSC_FALSE; 2501 2502 if (pcbddc->dbg_flag) { 2503 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2504 } 2505 2506 compute_range = PETSC_FALSE; 2507 if (thresh > 1.+PETSC_SMALL && !same_data) { 2508 compute_range = PETSC_TRUE; 2509 } 2510 2511 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2512 if (compute_range) { 2513 2514 /* ask for eigenvalues larger than thresh */ 2515 #if defined(PETSC_USE_COMPLEX) 2516 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2517 #else 2518 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2519 #endif 2520 } else if (!same_data) { 2521 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2522 B_IL = 1; 2523 #if defined(PETSC_USE_COMPLEX) 2524 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2525 #else 2526 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2527 #endif 2528 } else { /* same_data is true, so get the adaptive function requested by the user */ 2529 PetscInt k; 2530 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2531 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2532 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2533 nmin = nmax; 2534 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2535 for (k=0;k<nmax;k++) { 2536 eigs[k] = 1./PETSC_SMALL; 2537 eigv[k*(subset_size+1)] = 1.0; 2538 } 2539 } 2540 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2541 if (B_ierr) { 2542 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2543 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2544 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2545 } 2546 2547 if (B_neigs > nmax) { 2548 if (pcbddc->dbg_flag) { 2549 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2550 } 2551 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2552 B_neigs = nmax; 2553 } 2554 2555 nmin_s = PetscMin(nmin,B_N); 2556 if (B_neigs < nmin_s) { 2557 PetscBLASInt B_neigs2; 2558 2559 if (pcbddc->use_deluxe_scaling) { 2560 B_IL = B_N - nmin_s + 1; 2561 B_IU = B_N - B_neigs; 2562 } else { 2563 B_IL = B_neigs + 1; 2564 B_IU = nmin_s; 2565 } 2566 if (pcbddc->dbg_flag) { 2567 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2568 } 2569 if (sub_schurs->is_hermitian) { 2570 PetscInt j,k; 2571 for (j=0;j<subset_size;j++) { 2572 for (k=j;k<subset_size;k++) { 2573 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2574 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2575 } 2576 } 2577 } else { 2578 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2579 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2580 } 2581 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2582 #if defined(PETSC_USE_COMPLEX) 2583 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2584 #else 2585 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2586 #endif 2587 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2588 B_neigs += B_neigs2; 2589 } 2590 if (B_ierr) { 2591 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2592 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2593 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2594 } 2595 if (pcbddc->dbg_flag) { 2596 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2597 for (j=0;j<B_neigs;j++) { 2598 if (eigs[j] == 0.0) { 2599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2600 } else { 2601 if (pcbddc->use_deluxe_scaling) { 2602 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2603 } else { 2604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2605 } 2606 } 2607 } 2608 } 2609 } else { 2610 /* TODO */ 2611 } 2612 } 2613 /* change the basis back to the original one */ 2614 if (sub_schurs->change) { 2615 Mat change,phi,phit; 2616 2617 if (pcbddc->dbg_flag > 1) { 2618 PetscInt ii; 2619 for (ii=0;ii<B_neigs;ii++) { 2620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2621 for (j=0;j<B_N;j++) { 2622 #if defined(PETSC_USE_COMPLEX) 2623 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2624 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2625 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2626 #else 2627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2628 #endif 2629 } 2630 } 2631 } 2632 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2633 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2634 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2635 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2636 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2637 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2638 } 2639 maxneigs = PetscMax(B_neigs,maxneigs); 2640 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2641 if (B_neigs) { 2642 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2643 2644 if (pcbddc->dbg_flag > 1) { 2645 PetscInt ii; 2646 for (ii=0;ii<B_neigs;ii++) { 2647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2648 for (j=0;j<B_N;j++) { 2649 #if defined(PETSC_USE_COMPLEX) 2650 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2651 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2652 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2653 #else 2654 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2655 #endif 2656 } 2657 } 2658 } 2659 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2660 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2661 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2662 cum++; 2663 } 2664 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2665 /* shift for next computation */ 2666 cumarray += subset_size*subset_size; 2667 } 2668 if (pcbddc->dbg_flag) { 2669 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2670 } 2671 2672 if (mss) { 2673 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2674 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2675 /* destroy matrices (junk) */ 2676 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2677 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2678 } 2679 if (allocated_S_St) { 2680 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2681 } 2682 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2683 #if defined(PETSC_USE_COMPLEX) 2684 ierr = PetscFree(rwork);CHKERRQ(ierr); 2685 #endif 2686 if (pcbddc->dbg_flag) { 2687 PetscInt maxneigs_r; 2688 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2689 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2690 } 2691 PetscFunctionReturn(0); 2692 } 2693 2694 #undef __FUNCT__ 2695 #define __FUNCT__ "PCBDDCSetUpSolvers" 2696 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2697 { 2698 PetscScalar *coarse_submat_vals; 2699 PetscErrorCode ierr; 2700 2701 PetscFunctionBegin; 2702 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2703 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2704 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2705 2706 /* Setup local neumann solver ksp_R */ 2707 /* PCBDDCSetUpLocalScatters should be called first! */ 2708 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2709 2710 /* 2711 Setup local correction and local part of coarse basis. 2712 Gives back the dense local part of the coarse matrix in column major ordering 2713 */ 2714 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2715 2716 /* Compute total number of coarse nodes and setup coarse solver */ 2717 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2718 2719 /* free */ 2720 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2721 PetscFunctionReturn(0); 2722 } 2723 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "PCBDDCResetCustomization" 2726 PetscErrorCode PCBDDCResetCustomization(PC pc) 2727 { 2728 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2733 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2734 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2735 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2736 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2737 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2738 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2739 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2740 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2741 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2742 PetscFunctionReturn(0); 2743 } 2744 2745 #undef __FUNCT__ 2746 #define __FUNCT__ "PCBDDCResetTopography" 2747 PetscErrorCode PCBDDCResetTopography(PC pc) 2748 { 2749 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2750 PetscInt i; 2751 PetscErrorCode ierr; 2752 2753 PetscFunctionBegin; 2754 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2755 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2756 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2757 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2758 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2759 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2760 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2761 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2762 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2763 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2764 for (i=0;i<pcbddc->n_local_subs;i++) { 2765 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2766 } 2767 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2768 if (pcbddc->sub_schurs) { 2769 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2770 } 2771 pcbddc->graphanalyzed = PETSC_FALSE; 2772 pcbddc->recompute_topography = PETSC_TRUE; 2773 PetscFunctionReturn(0); 2774 } 2775 2776 #undef __FUNCT__ 2777 #define __FUNCT__ "PCBDDCResetSolvers" 2778 PetscErrorCode PCBDDCResetSolvers(PC pc) 2779 { 2780 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2781 PetscErrorCode ierr; 2782 2783 PetscFunctionBegin; 2784 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2785 if (pcbddc->coarse_phi_B) { 2786 PetscScalar *array; 2787 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2788 ierr = PetscFree(array);CHKERRQ(ierr); 2789 } 2790 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2791 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2792 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2793 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2794 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2795 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2796 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2797 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2798 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2799 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2800 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2801 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2802 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2803 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2804 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2805 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2806 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2807 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2808 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2809 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2810 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2811 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2812 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2813 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2814 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2815 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2816 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2817 if (pcbddc->benign_zerodiag_subs) { 2818 PetscInt i; 2819 for (i=0;i<pcbddc->benign_n;i++) { 2820 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2821 } 2822 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2823 } 2824 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2825 PetscFunctionReturn(0); 2826 } 2827 2828 #undef __FUNCT__ 2829 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2830 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2831 { 2832 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2833 PC_IS *pcis = (PC_IS*)pc->data; 2834 VecType impVecType; 2835 PetscInt n_constraints,n_R,old_size; 2836 PetscErrorCode ierr; 2837 2838 PetscFunctionBegin; 2839 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2840 /* get sizes */ 2841 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2842 n_R = pcis->n - pcbddc->n_vertices; 2843 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2844 /* local work vectors (try to avoid unneeded work)*/ 2845 /* R nodes */ 2846 old_size = -1; 2847 if (pcbddc->vec1_R) { 2848 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2849 } 2850 if (n_R != old_size) { 2851 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2852 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2853 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2854 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2855 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2856 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2857 } 2858 /* local primal dofs */ 2859 old_size = -1; 2860 if (pcbddc->vec1_P) { 2861 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2862 } 2863 if (pcbddc->local_primal_size != old_size) { 2864 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2865 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2866 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2867 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2868 } 2869 /* local explicit constraints */ 2870 old_size = -1; 2871 if (pcbddc->vec1_C) { 2872 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2873 } 2874 if (n_constraints && n_constraints != old_size) { 2875 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2876 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2877 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2878 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2879 } 2880 PetscFunctionReturn(0); 2881 } 2882 2883 #undef __FUNCT__ 2884 #define __FUNCT__ "PCBDDCSetUpCorrection" 2885 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2886 { 2887 PetscErrorCode ierr; 2888 /* pointers to pcis and pcbddc */ 2889 PC_IS* pcis = (PC_IS*)pc->data; 2890 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2891 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2892 /* submatrices of local problem */ 2893 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2894 /* submatrices of local coarse problem */ 2895 Mat S_VV,S_CV,S_VC,S_CC; 2896 /* working matrices */ 2897 Mat C_CR; 2898 /* additional working stuff */ 2899 PC pc_R; 2900 Mat F; 2901 Vec dummy_vec; 2902 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2903 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2904 PetscScalar *work; 2905 PetscInt *idx_V_B; 2906 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2907 PetscInt i,n_R,n_D,n_B; 2908 2909 /* some shortcuts to scalars */ 2910 PetscScalar one=1.0,m_one=-1.0; 2911 2912 PetscFunctionBegin; 2913 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 2914 2915 /* Set Non-overlapping dimensions */ 2916 n_vertices = pcbddc->n_vertices; 2917 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 2918 n_B = pcis->n_B; 2919 n_D = pcis->n - n_B; 2920 n_R = pcis->n - n_vertices; 2921 2922 /* vertices in boundary numbering */ 2923 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 2924 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 2925 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 2926 2927 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2928 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 2929 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 2930 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 2931 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 2932 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 2933 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 2934 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 2935 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 2936 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 2937 2938 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 2939 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 2940 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 2941 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 2942 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 2943 lda_rhs = n_R; 2944 need_benign_correction = PETSC_FALSE; 2945 if (isLU || isILU || isCHOL) { 2946 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 2947 } else if (sub_schurs && sub_schurs->reuse_solver) { 2948 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2949 MatFactorType type; 2950 2951 F = reuse_solver->F; 2952 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 2953 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 2954 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 2955 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 2956 } else { 2957 F = NULL; 2958 } 2959 2960 /* allocate workspace */ 2961 n = 0; 2962 if (n_constraints) { 2963 n += lda_rhs*n_constraints; 2964 } 2965 if (n_vertices) { 2966 n = PetscMax(2*lda_rhs*n_vertices,n); 2967 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 2968 } 2969 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 2970 2971 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 2972 dummy_vec = NULL; 2973 if (need_benign_correction && lda_rhs != n_R && F) { 2974 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 2975 } 2976 2977 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2978 if (n_constraints) { 2979 Mat M1,M2,M3,C_B; 2980 IS is_aux; 2981 PetscScalar *array,*array2; 2982 2983 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2984 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2985 2986 /* Extract constraints on R nodes: C_{CR} */ 2987 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 2988 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2989 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2990 2991 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 2992 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 2993 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2994 for (i=0;i<n_constraints;i++) { 2995 const PetscScalar *row_cmat_values; 2996 const PetscInt *row_cmat_indices; 2997 PetscInt size_of_constraint,j; 2998 2999 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3000 for (j=0;j<size_of_constraint;j++) { 3001 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3002 } 3003 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3004 } 3005 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3006 if (F) { 3007 Mat B; 3008 3009 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3010 if (need_benign_correction) { 3011 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3012 3013 /* rhs is already zero on interior dofs, no need to change the rhs */ 3014 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3015 } 3016 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3017 if (need_benign_correction) { 3018 PetscScalar *marr; 3019 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3020 3021 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3022 if (lda_rhs != n_R) { 3023 for (i=0;i<n_constraints;i++) { 3024 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3025 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3026 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3027 } 3028 } else { 3029 for (i=0;i<n_constraints;i++) { 3030 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3031 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3032 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3033 } 3034 } 3035 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3036 } 3037 ierr = MatDestroy(&B);CHKERRQ(ierr); 3038 } else { 3039 PetscScalar *marr; 3040 3041 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3042 for (i=0;i<n_constraints;i++) { 3043 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3044 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3045 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3046 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3047 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3048 } 3049 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3050 } 3051 if (!pcbddc->switch_static) { 3052 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3053 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3054 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3055 for (i=0;i<n_constraints;i++) { 3056 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3057 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3058 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3059 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3060 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3061 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3062 } 3063 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3064 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3065 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3066 } else { 3067 if (lda_rhs != n_R) { 3068 IS dummy; 3069 3070 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3071 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3072 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3073 } else { 3074 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3075 pcbddc->local_auxmat2 = local_auxmat2_R; 3076 } 3077 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3078 } 3079 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3080 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3081 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3082 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3083 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3084 if (isCHOL) { 3085 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3086 } else { 3087 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3088 } 3089 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3090 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3091 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3092 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3093 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3094 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3095 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3096 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3097 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3098 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3099 } 3100 3101 /* Get submatrices from subdomain matrix */ 3102 if (n_vertices) { 3103 IS is_aux; 3104 3105 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3106 IS tis; 3107 3108 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3109 ierr = ISSort(tis);CHKERRQ(ierr); 3110 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3111 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3112 } else { 3113 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3114 } 3115 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3116 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3117 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3118 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3119 } 3120 3121 /* Matrix of coarse basis functions (local) */ 3122 if (pcbddc->coarse_phi_B) { 3123 PetscInt on_B,on_primal,on_D=n_D; 3124 if (pcbddc->coarse_phi_D) { 3125 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3126 } 3127 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3128 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3129 PetscScalar *marray; 3130 3131 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3132 ierr = PetscFree(marray);CHKERRQ(ierr); 3133 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3134 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3135 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3136 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3137 } 3138 } 3139 3140 if (!pcbddc->coarse_phi_B) { 3141 PetscScalar *marray; 3142 3143 n = n_B*pcbddc->local_primal_size; 3144 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3145 n += n_D*pcbddc->local_primal_size; 3146 } 3147 if (!pcbddc->symmetric_primal) { 3148 n *= 2; 3149 } 3150 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3151 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3152 n = n_B*pcbddc->local_primal_size; 3153 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3154 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3155 n += n_D*pcbddc->local_primal_size; 3156 } 3157 if (!pcbddc->symmetric_primal) { 3158 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3159 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3160 n = n_B*pcbddc->local_primal_size; 3161 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3162 } 3163 } else { 3164 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3165 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3166 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3167 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3168 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3169 } 3170 } 3171 } 3172 3173 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3174 p0_lidx_I = NULL; 3175 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3176 const PetscInt *idxs; 3177 3178 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3179 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3180 for (i=0;i<pcbddc->benign_n;i++) { 3181 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3182 } 3183 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3184 } 3185 3186 /* vertices */ 3187 if (n_vertices) { 3188 3189 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3190 3191 if (n_R) { 3192 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3193 PetscBLASInt B_N,B_one = 1; 3194 PetscScalar *x,*y; 3195 PetscBool isseqaij; 3196 3197 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3198 if (need_benign_correction) { 3199 ISLocalToGlobalMapping RtoN; 3200 IS is_p0; 3201 PetscInt *idxs_p0,n; 3202 3203 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3204 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3205 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3206 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3207 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3208 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3209 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3210 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3211 } 3212 3213 if (lda_rhs == n_R) { 3214 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3215 } else { 3216 PetscScalar *av,*array; 3217 const PetscInt *xadj,*adjncy; 3218 PetscInt n; 3219 PetscBool flg_row; 3220 3221 array = work+lda_rhs*n_vertices; 3222 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3223 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3224 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3225 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3226 for (i=0;i<n;i++) { 3227 PetscInt j; 3228 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3229 } 3230 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3231 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3232 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3233 } 3234 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3235 if (need_benign_correction) { 3236 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3237 PetscScalar *marr; 3238 3239 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3240 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3241 3242 | 0 0 0 | (V) 3243 L = | 0 0 -1 | (P-p0) 3244 | 0 0 -1 | (p0) 3245 3246 */ 3247 for (i=0;i<reuse_solver->benign_n;i++) { 3248 const PetscScalar *vals; 3249 const PetscInt *idxs,*idxs_zero; 3250 PetscInt n,j,nz; 3251 3252 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3253 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3254 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3255 for (j=0;j<n;j++) { 3256 PetscScalar val = vals[j]; 3257 PetscInt k,col = idxs[j]; 3258 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3259 } 3260 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3261 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3262 } 3263 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3264 } 3265 if (F) { 3266 /* need to correct the rhs */ 3267 if (need_benign_correction) { 3268 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3269 PetscScalar *marr; 3270 3271 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3272 if (lda_rhs != n_R) { 3273 for (i=0;i<n_vertices;i++) { 3274 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3275 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3276 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3277 } 3278 } else { 3279 for (i=0;i<n_vertices;i++) { 3280 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3281 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3282 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3283 } 3284 } 3285 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3286 } 3287 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3288 /* need to correct the solution */ 3289 if (need_benign_correction) { 3290 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3291 PetscScalar *marr; 3292 3293 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3294 if (lda_rhs != n_R) { 3295 for (i=0;i<n_vertices;i++) { 3296 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3297 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3298 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3299 } 3300 } else { 3301 for (i=0;i<n_vertices;i++) { 3302 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3303 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3304 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3305 } 3306 } 3307 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3308 } 3309 } else { 3310 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3311 for (i=0;i<n_vertices;i++) { 3312 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3313 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3314 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3315 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3316 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3317 } 3318 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3319 } 3320 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3321 /* S_VV and S_CV */ 3322 if (n_constraints) { 3323 Mat B; 3324 3325 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3326 for (i=0;i<n_vertices;i++) { 3327 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3328 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3329 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3330 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3331 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3332 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3333 } 3334 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3335 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3336 ierr = MatDestroy(&B);CHKERRQ(ierr); 3337 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3338 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3339 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3340 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3341 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3342 ierr = MatDestroy(&B);CHKERRQ(ierr); 3343 } 3344 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3345 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3346 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3347 } 3348 if (lda_rhs != n_R) { 3349 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3350 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3351 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3352 } 3353 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3354 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3355 if (need_benign_correction) { 3356 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3357 PetscScalar *marr,*sums; 3358 3359 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3360 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3361 for (i=0;i<reuse_solver->benign_n;i++) { 3362 const PetscScalar *vals; 3363 const PetscInt *idxs,*idxs_zero; 3364 PetscInt n,j,nz; 3365 3366 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3367 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3368 for (j=0;j<n_vertices;j++) { 3369 PetscInt k; 3370 sums[j] = 0.; 3371 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3372 } 3373 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3374 for (j=0;j<n;j++) { 3375 PetscScalar val = vals[j]; 3376 PetscInt k; 3377 for (k=0;k<n_vertices;k++) { 3378 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3379 } 3380 } 3381 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3382 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3383 } 3384 ierr = PetscFree(sums);CHKERRQ(ierr); 3385 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3386 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3387 } 3388 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3389 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3390 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3391 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3392 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3393 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3394 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3395 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3396 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3397 } else { 3398 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3399 } 3400 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3401 3402 /* coarse basis functions */ 3403 for (i=0;i<n_vertices;i++) { 3404 PetscScalar *y; 3405 3406 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3407 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3408 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3409 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3410 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3411 y[n_B*i+idx_V_B[i]] = 1.0; 3412 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3413 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3414 3415 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3416 PetscInt j; 3417 3418 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3419 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3420 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3421 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3422 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3423 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3424 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3425 } 3426 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3427 } 3428 /* if n_R == 0 the object is not destroyed */ 3429 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3430 } 3431 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3432 3433 if (n_constraints) { 3434 Mat B; 3435 3436 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3437 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3438 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3439 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3440 if (n_vertices) { 3441 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3442 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3443 } else { 3444 Mat S_VCt; 3445 3446 if (lda_rhs != n_R) { 3447 ierr = MatDestroy(&B);CHKERRQ(ierr); 3448 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3449 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3450 } 3451 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3452 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3453 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3454 } 3455 } 3456 ierr = MatDestroy(&B);CHKERRQ(ierr); 3457 /* coarse basis functions */ 3458 for (i=0;i<n_constraints;i++) { 3459 PetscScalar *y; 3460 3461 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3462 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3463 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3464 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3465 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3466 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3467 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3468 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3469 PetscInt j; 3470 3471 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3472 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3473 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3474 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3475 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3476 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3477 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3478 } 3479 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3480 } 3481 } 3482 if (n_constraints) { 3483 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3484 } 3485 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3486 3487 /* coarse matrix entries relative to B_0 */ 3488 if (pcbddc->benign_n) { 3489 Mat B0_B,B0_BPHI; 3490 IS is_dummy; 3491 PetscScalar *data; 3492 PetscInt j; 3493 3494 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3495 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3496 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3497 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3498 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3499 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3500 for (j=0;j<pcbddc->benign_n;j++) { 3501 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3502 for (i=0;i<pcbddc->local_primal_size;i++) { 3503 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3504 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3505 } 3506 } 3507 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3508 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3509 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3510 } 3511 3512 /* compute other basis functions for non-symmetric problems */ 3513 if (!pcbddc->symmetric_primal) { 3514 Mat B_V=NULL,B_C=NULL; 3515 PetscScalar *marray; 3516 3517 if (n_constraints) { 3518 Mat S_CCT,C_CRT; 3519 3520 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3521 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3522 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3523 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3524 if (n_vertices) { 3525 Mat S_VCT; 3526 3527 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3528 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3529 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3530 } 3531 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3532 } else { 3533 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3534 } 3535 if (n_vertices && n_R) { 3536 PetscScalar *av,*marray; 3537 const PetscInt *xadj,*adjncy; 3538 PetscInt n; 3539 PetscBool flg_row; 3540 3541 /* B_V = B_V - A_VR^T */ 3542 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3543 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3544 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3545 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3546 for (i=0;i<n;i++) { 3547 PetscInt j; 3548 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3549 } 3550 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3551 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3552 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3553 } 3554 3555 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3556 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3557 for (i=0;i<n_vertices;i++) { 3558 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3559 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3560 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3561 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3562 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3563 } 3564 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3565 if (B_C) { 3566 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3567 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3568 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3569 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3570 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3571 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3572 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3573 } 3574 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3575 } 3576 /* coarse basis functions */ 3577 for (i=0;i<pcbddc->local_primal_size;i++) { 3578 PetscScalar *y; 3579 3580 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3581 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3582 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3583 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3584 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3585 if (i<n_vertices) { 3586 y[n_B*i+idx_V_B[i]] = 1.0; 3587 } 3588 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3589 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3590 3591 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3592 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3593 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3594 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3595 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3596 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3597 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3598 } 3599 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3600 } 3601 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3602 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3603 } 3604 /* free memory */ 3605 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3606 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3607 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3608 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3609 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3610 ierr = PetscFree(work);CHKERRQ(ierr); 3611 if (n_vertices) { 3612 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3613 } 3614 if (n_constraints) { 3615 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3616 } 3617 /* Checking coarse_sub_mat and coarse basis functios */ 3618 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3619 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3620 if (pcbddc->dbg_flag) { 3621 Mat coarse_sub_mat; 3622 Mat AUXMAT,TM1,TM2,TM3,TM4; 3623 Mat coarse_phi_D,coarse_phi_B; 3624 Mat coarse_psi_D,coarse_psi_B; 3625 Mat A_II,A_BB,A_IB,A_BI; 3626 Mat C_B,CPHI; 3627 IS is_dummy; 3628 Vec mones; 3629 MatType checkmattype=MATSEQAIJ; 3630 PetscReal real_value; 3631 3632 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3633 Mat A; 3634 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3635 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3636 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3637 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3638 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3639 ierr = MatDestroy(&A);CHKERRQ(ierr); 3640 } else { 3641 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3642 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3643 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3644 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3645 } 3646 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3647 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3648 if (!pcbddc->symmetric_primal) { 3649 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3650 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3651 } 3652 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3653 3654 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3655 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3656 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3657 if (!pcbddc->symmetric_primal) { 3658 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3659 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3660 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3661 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3662 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3663 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3664 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3665 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3666 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3667 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3668 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3669 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3670 } else { 3671 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3672 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3673 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3674 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3675 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3676 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3677 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3678 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3679 } 3680 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3681 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3682 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3683 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3684 if (pcbddc->benign_n) { 3685 Mat B0_B,B0_BPHI; 3686 PetscScalar *data,*data2; 3687 PetscInt j; 3688 3689 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3690 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3691 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3692 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3693 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3694 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3695 for (j=0;j<pcbddc->benign_n;j++) { 3696 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3697 for (i=0;i<pcbddc->local_primal_size;i++) { 3698 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3699 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3700 } 3701 } 3702 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3703 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3704 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3705 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3706 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3707 } 3708 #if 0 3709 { 3710 PetscViewer viewer; 3711 char filename[256]; 3712 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3713 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3714 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3715 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3716 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3717 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3718 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3719 if (save_change) { 3720 Mat phi_B; 3721 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3722 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3723 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3724 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3725 } else { 3726 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3727 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3728 } 3729 if (pcbddc->coarse_phi_D) { 3730 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3731 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3732 } 3733 if (pcbddc->coarse_psi_B) { 3734 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3735 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3736 } 3737 if (pcbddc->coarse_psi_D) { 3738 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3739 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3740 } 3741 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3742 } 3743 #endif 3744 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3745 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3746 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3747 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3748 3749 /* check constraints */ 3750 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3751 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3752 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3753 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3754 } else { 3755 PetscScalar *data; 3756 Mat tmat; 3757 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3758 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3759 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3760 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3761 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3762 } 3763 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3764 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3765 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3766 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3767 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3768 if (!pcbddc->symmetric_primal) { 3769 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3770 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3771 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3772 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3773 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3774 } 3775 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3776 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3777 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3778 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3779 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3780 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3781 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3782 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3783 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3784 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3785 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3786 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3787 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3788 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3789 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3790 if (!pcbddc->symmetric_primal) { 3791 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3792 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3793 } 3794 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3795 } 3796 /* get back data */ 3797 *coarse_submat_vals_n = coarse_submat_vals; 3798 PetscFunctionReturn(0); 3799 } 3800 3801 #undef __FUNCT__ 3802 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3803 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3804 { 3805 Mat *work_mat; 3806 IS isrow_s,iscol_s; 3807 PetscBool rsorted,csorted; 3808 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3809 PetscErrorCode ierr; 3810 3811 PetscFunctionBegin; 3812 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3813 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3814 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3815 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3816 3817 if (!rsorted) { 3818 const PetscInt *idxs; 3819 PetscInt *idxs_sorted,i; 3820 3821 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3822 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3823 for (i=0;i<rsize;i++) { 3824 idxs_perm_r[i] = i; 3825 } 3826 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3827 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3828 for (i=0;i<rsize;i++) { 3829 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3830 } 3831 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3832 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3833 } else { 3834 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3835 isrow_s = isrow; 3836 } 3837 3838 if (!csorted) { 3839 if (isrow == iscol) { 3840 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3841 iscol_s = isrow_s; 3842 } else { 3843 const PetscInt *idxs; 3844 PetscInt *idxs_sorted,i; 3845 3846 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3847 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3848 for (i=0;i<csize;i++) { 3849 idxs_perm_c[i] = i; 3850 } 3851 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3852 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3853 for (i=0;i<csize;i++) { 3854 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3855 } 3856 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3857 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3858 } 3859 } else { 3860 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3861 iscol_s = iscol; 3862 } 3863 3864 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3865 3866 if (!rsorted || !csorted) { 3867 Mat new_mat; 3868 IS is_perm_r,is_perm_c; 3869 3870 if (!rsorted) { 3871 PetscInt *idxs_r,i; 3872 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3873 for (i=0;i<rsize;i++) { 3874 idxs_r[idxs_perm_r[i]] = i; 3875 } 3876 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3877 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3878 } else { 3879 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3880 } 3881 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3882 3883 if (!csorted) { 3884 if (isrow_s == iscol_s) { 3885 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3886 is_perm_c = is_perm_r; 3887 } else { 3888 PetscInt *idxs_c,i; 3889 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 3890 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3891 for (i=0;i<csize;i++) { 3892 idxs_c[idxs_perm_c[i]] = i; 3893 } 3894 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3895 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3896 } 3897 } else { 3898 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3899 } 3900 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3901 3902 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3903 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3904 work_mat[0] = new_mat; 3905 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3906 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3907 } 3908 3909 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 3910 *B = work_mat[0]; 3911 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 3912 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 3913 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 3914 PetscFunctionReturn(0); 3915 } 3916 3917 #undef __FUNCT__ 3918 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 3919 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 3920 { 3921 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3922 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3923 Mat new_mat; 3924 IS is_local,is_global; 3925 PetscInt local_size; 3926 PetscBool isseqaij; 3927 PetscErrorCode ierr; 3928 3929 PetscFunctionBegin; 3930 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3931 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 3932 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 3933 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 3934 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 3935 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 3936 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 3937 3938 /* check */ 3939 if (pcbddc->dbg_flag) { 3940 Vec x,x_change; 3941 PetscReal error; 3942 3943 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 3944 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3945 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 3946 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3947 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3948 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 3949 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3950 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3951 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3952 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3953 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3954 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 3955 ierr = VecDestroy(&x);CHKERRQ(ierr); 3956 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3957 } 3958 3959 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 3960 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3961 if (isseqaij) { 3962 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3963 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3964 } else { 3965 Mat work_mat; 3966 3967 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3968 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3969 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3970 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 3971 } 3972 if (matis->A->symmetric_set) { 3973 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 3974 #if !defined(PETSC_USE_COMPLEX) 3975 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 3976 #endif 3977 } 3978 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 3979 PetscFunctionReturn(0); 3980 } 3981 3982 #undef __FUNCT__ 3983 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 3984 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 3985 { 3986 PC_IS* pcis = (PC_IS*)(pc->data); 3987 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3988 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3989 PetscInt *idx_R_local=NULL; 3990 PetscInt n_vertices,i,j,n_R,n_D,n_B; 3991 PetscInt vbs,bs; 3992 PetscBT bitmask=NULL; 3993 PetscErrorCode ierr; 3994 3995 PetscFunctionBegin; 3996 /* 3997 No need to setup local scatters if 3998 - primal space is unchanged 3999 AND 4000 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4001 AND 4002 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4003 */ 4004 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4005 PetscFunctionReturn(0); 4006 } 4007 /* destroy old objects */ 4008 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4009 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4010 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4011 /* Set Non-overlapping dimensions */ 4012 n_B = pcis->n_B; 4013 n_D = pcis->n - n_B; 4014 n_vertices = pcbddc->n_vertices; 4015 4016 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4017 4018 /* create auxiliary bitmask and allocate workspace */ 4019 if (!sub_schurs || !sub_schurs->reuse_solver) { 4020 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4021 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4022 for (i=0;i<n_vertices;i++) { 4023 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4024 } 4025 4026 for (i=0, n_R=0; i<pcis->n; i++) { 4027 if (!PetscBTLookup(bitmask,i)) { 4028 idx_R_local[n_R++] = i; 4029 } 4030 } 4031 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4032 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4033 4034 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4035 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4036 } 4037 4038 /* Block code */ 4039 vbs = 1; 4040 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4041 if (bs>1 && !(n_vertices%bs)) { 4042 PetscBool is_blocked = PETSC_TRUE; 4043 PetscInt *vary; 4044 if (!sub_schurs || !sub_schurs->reuse_solver) { 4045 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4046 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4047 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4048 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4049 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4050 for (i=0; i<pcis->n/bs; i++) { 4051 if (vary[i]!=0 && vary[i]!=bs) { 4052 is_blocked = PETSC_FALSE; 4053 break; 4054 } 4055 } 4056 ierr = PetscFree(vary);CHKERRQ(ierr); 4057 } else { 4058 /* Verify directly the R set */ 4059 for (i=0; i<n_R/bs; i++) { 4060 PetscInt j,node=idx_R_local[bs*i]; 4061 for (j=1; j<bs; j++) { 4062 if (node != idx_R_local[bs*i+j]-j) { 4063 is_blocked = PETSC_FALSE; 4064 break; 4065 } 4066 } 4067 } 4068 } 4069 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4070 vbs = bs; 4071 for (i=0;i<n_R/vbs;i++) { 4072 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4073 } 4074 } 4075 } 4076 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4077 if (sub_schurs && sub_schurs->reuse_solver) { 4078 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4079 4080 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4081 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4082 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4083 reuse_solver->is_R = pcbddc->is_R_local; 4084 } else { 4085 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4086 } 4087 4088 /* print some info if requested */ 4089 if (pcbddc->dbg_flag) { 4090 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4091 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4092 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4093 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4095 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4096 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4097 } 4098 4099 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4100 if (!sub_schurs || !sub_schurs->reuse_solver) { 4101 IS is_aux1,is_aux2; 4102 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4103 4104 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4105 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4106 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4107 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4108 for (i=0; i<n_D; i++) { 4109 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4110 } 4111 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4112 for (i=0, j=0; i<n_R; i++) { 4113 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4114 aux_array1[j++] = i; 4115 } 4116 } 4117 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4118 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4119 for (i=0, j=0; i<n_B; i++) { 4120 if (!PetscBTLookup(bitmask,is_indices[i])) { 4121 aux_array2[j++] = i; 4122 } 4123 } 4124 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4125 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4126 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4127 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4128 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4129 4130 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4131 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4132 for (i=0, j=0; i<n_R; i++) { 4133 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4134 aux_array1[j++] = i; 4135 } 4136 } 4137 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4138 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4139 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4140 } 4141 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4142 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4143 } else { 4144 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4145 IS tis; 4146 PetscInt schur_size; 4147 4148 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4149 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4150 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4151 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4152 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4153 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4154 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4155 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4156 } 4157 } 4158 PetscFunctionReturn(0); 4159 } 4160 4161 4162 #undef __FUNCT__ 4163 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4164 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4165 { 4166 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4167 PC_IS *pcis = (PC_IS*)pc->data; 4168 PC pc_temp; 4169 Mat A_RR; 4170 MatReuse reuse; 4171 PetscScalar m_one = -1.0; 4172 PetscReal value; 4173 PetscInt n_D,n_R; 4174 PetscBool check_corr[2],issbaij; 4175 PetscErrorCode ierr; 4176 /* prefixes stuff */ 4177 char dir_prefix[256],neu_prefix[256],str_level[16]; 4178 size_t len; 4179 4180 PetscFunctionBegin; 4181 4182 /* compute prefixes */ 4183 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4184 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4185 if (!pcbddc->current_level) { 4186 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4187 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4188 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4189 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4190 } else { 4191 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4192 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4193 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4194 len -= 15; /* remove "pc_bddc_coarse_" */ 4195 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4196 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4197 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4198 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4199 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4200 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4201 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4202 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4203 } 4204 4205 /* DIRICHLET PROBLEM */ 4206 if (dirichlet) { 4207 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4208 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4209 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4210 if (pcbddc->dbg_flag) { 4211 Mat A_IIn; 4212 4213 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4214 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4215 pcis->A_II = A_IIn; 4216 } 4217 } 4218 if (pcbddc->local_mat->symmetric_set) { 4219 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4220 } 4221 /* Matrix for Dirichlet problem is pcis->A_II */ 4222 n_D = pcis->n - pcis->n_B; 4223 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4224 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4225 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4226 /* default */ 4227 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4228 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4229 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4230 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4231 if (issbaij) { 4232 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4233 } else { 4234 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4235 } 4236 /* Allow user's customization */ 4237 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4238 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4239 } 4240 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4241 if (sub_schurs && sub_schurs->reuse_solver) { 4242 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4243 4244 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4245 } 4246 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4247 if (!n_D) { 4248 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4249 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4250 } 4251 /* Set Up KSP for Dirichlet problem of BDDC */ 4252 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4253 /* set ksp_D into pcis data */ 4254 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4255 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4256 pcis->ksp_D = pcbddc->ksp_D; 4257 } 4258 4259 /* NEUMANN PROBLEM */ 4260 A_RR = 0; 4261 if (neumann) { 4262 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4263 PetscInt ibs,mbs; 4264 PetscBool issbaij; 4265 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4266 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4267 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4268 if (pcbddc->ksp_R) { /* already created ksp */ 4269 PetscInt nn_R; 4270 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4271 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4272 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4273 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4274 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4275 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4276 reuse = MAT_INITIAL_MATRIX; 4277 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4278 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4279 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4280 reuse = MAT_INITIAL_MATRIX; 4281 } else { /* safe to reuse the matrix */ 4282 reuse = MAT_REUSE_MATRIX; 4283 } 4284 } 4285 /* last check */ 4286 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4287 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4288 reuse = MAT_INITIAL_MATRIX; 4289 } 4290 } else { /* first time, so we need to create the matrix */ 4291 reuse = MAT_INITIAL_MATRIX; 4292 } 4293 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4294 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4295 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4296 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4297 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4298 if (matis->A == pcbddc->local_mat) { 4299 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4300 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4301 } else { 4302 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4303 } 4304 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4305 if (matis->A == pcbddc->local_mat) { 4306 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4307 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4308 } else { 4309 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4310 } 4311 } 4312 /* extract A_RR */ 4313 if (sub_schurs && sub_schurs->reuse_solver) { 4314 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4315 4316 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4317 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4318 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4319 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4320 } else { 4321 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4322 } 4323 } else { 4324 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4325 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4326 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4327 } 4328 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4329 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4330 } 4331 if (pcbddc->local_mat->symmetric_set) { 4332 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4333 } 4334 if (!pcbddc->ksp_R) { /* create object if not present */ 4335 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4336 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4337 /* default */ 4338 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4339 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4340 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4341 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4342 if (issbaij) { 4343 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4344 } else { 4345 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4346 } 4347 /* Allow user's customization */ 4348 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4349 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4350 } 4351 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4352 if (!n_R) { 4353 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4354 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4355 } 4356 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4357 /* Reuse solver if it is present */ 4358 if (sub_schurs && sub_schurs->reuse_solver) { 4359 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4360 4361 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4362 } 4363 /* Set Up KSP for Neumann problem of BDDC */ 4364 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4365 } 4366 4367 if (pcbddc->dbg_flag) { 4368 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4369 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4370 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4371 } 4372 4373 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4374 check_corr[0] = check_corr[1] = PETSC_FALSE; 4375 if (pcbddc->NullSpace_corr[0]) { 4376 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4377 } 4378 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4379 check_corr[0] = PETSC_TRUE; 4380 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4381 } 4382 if (neumann && pcbddc->NullSpace_corr[2]) { 4383 check_corr[1] = PETSC_TRUE; 4384 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4385 } 4386 4387 /* check Dirichlet and Neumann solvers */ 4388 if (pcbddc->dbg_flag) { 4389 if (dirichlet) { /* Dirichlet */ 4390 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4391 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4392 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4393 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4394 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4395 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4396 if (check_corr[0]) { 4397 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4398 } 4399 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4400 } 4401 if (neumann) { /* Neumann */ 4402 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4403 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4404 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4405 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4406 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4407 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4408 if (check_corr[1]) { 4409 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4410 } 4411 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4412 } 4413 } 4414 /* free Neumann problem's matrix */ 4415 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4416 PetscFunctionReturn(0); 4417 } 4418 4419 #undef __FUNCT__ 4420 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4421 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4422 { 4423 PetscErrorCode ierr; 4424 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4425 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4426 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4427 4428 PetscFunctionBegin; 4429 if (!reuse_solver) { 4430 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4431 } 4432 if (!pcbddc->switch_static) { 4433 if (applytranspose && pcbddc->local_auxmat1) { 4434 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4435 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4436 } 4437 if (!reuse_solver) { 4438 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4439 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4440 } else { 4441 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4442 4443 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4444 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4445 } 4446 } else { 4447 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4448 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4449 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4450 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4451 if (applytranspose && pcbddc->local_auxmat1) { 4452 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4453 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4454 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4455 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4456 } 4457 } 4458 if (!reuse_solver || pcbddc->switch_static) { 4459 if (applytranspose) { 4460 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4461 } else { 4462 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4463 } 4464 } else { 4465 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4466 4467 if (applytranspose) { 4468 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4469 } else { 4470 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4471 } 4472 } 4473 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4474 if (!pcbddc->switch_static) { 4475 if (!reuse_solver) { 4476 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4477 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4478 } else { 4479 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4480 4481 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4482 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4483 } 4484 if (!applytranspose && pcbddc->local_auxmat1) { 4485 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4486 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4487 } 4488 } else { 4489 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4490 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4491 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4492 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4493 if (!applytranspose && pcbddc->local_auxmat1) { 4494 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4495 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4496 } 4497 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4498 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4499 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4500 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4501 } 4502 PetscFunctionReturn(0); 4503 } 4504 4505 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4506 #undef __FUNCT__ 4507 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4508 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4509 { 4510 PetscErrorCode ierr; 4511 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4512 PC_IS* pcis = (PC_IS*) (pc->data); 4513 const PetscScalar zero = 0.0; 4514 4515 PetscFunctionBegin; 4516 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4517 if (!pcbddc->benign_apply_coarse_only) { 4518 if (applytranspose) { 4519 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4520 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4521 } else { 4522 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4523 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4524 } 4525 } else { 4526 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4527 } 4528 4529 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4530 if (pcbddc->benign_n) { 4531 PetscScalar *array; 4532 PetscInt j; 4533 4534 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4535 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4536 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4537 } 4538 4539 /* start communications from local primal nodes to rhs of coarse solver */ 4540 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4541 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4542 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4543 4544 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4545 if (pcbddc->coarse_ksp) { 4546 Mat coarse_mat; 4547 Vec rhs,sol; 4548 MatNullSpace nullsp; 4549 PetscBool isbddc = PETSC_FALSE; 4550 4551 if (pcbddc->benign_have_null) { 4552 PC coarse_pc; 4553 4554 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4555 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4556 /* we need to propagate to coarser levels the need for a possible benign correction */ 4557 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4558 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4559 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4560 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4561 } 4562 } 4563 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4564 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4565 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4566 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4567 if (nullsp) { 4568 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4569 } 4570 if (applytranspose) { 4571 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4572 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4573 } else { 4574 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4575 PC coarse_pc; 4576 4577 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4578 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4579 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4580 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4581 } else { 4582 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4583 } 4584 } 4585 /* we don't need the benign correction at coarser levels anymore */ 4586 if (pcbddc->benign_have_null && isbddc) { 4587 PC coarse_pc; 4588 PC_BDDC* coarsepcbddc; 4589 4590 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4591 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4592 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4593 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4594 } 4595 if (nullsp) { 4596 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4597 } 4598 } 4599 4600 /* Local solution on R nodes */ 4601 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4602 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4603 } 4604 /* communications from coarse sol to local primal nodes */ 4605 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4606 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4607 4608 /* Sum contributions from the two levels */ 4609 if (!pcbddc->benign_apply_coarse_only) { 4610 if (applytranspose) { 4611 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4612 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4613 } else { 4614 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4615 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4616 } 4617 /* store p0 */ 4618 if (pcbddc->benign_n) { 4619 PetscScalar *array; 4620 PetscInt j; 4621 4622 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4623 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4624 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4625 } 4626 } else { /* expand the coarse solution */ 4627 if (applytranspose) { 4628 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4629 } else { 4630 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4631 } 4632 } 4633 PetscFunctionReturn(0); 4634 } 4635 4636 #undef __FUNCT__ 4637 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4638 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4639 { 4640 PetscErrorCode ierr; 4641 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4642 PetscScalar *array; 4643 Vec from,to; 4644 4645 PetscFunctionBegin; 4646 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4647 from = pcbddc->coarse_vec; 4648 to = pcbddc->vec1_P; 4649 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4650 Vec tvec; 4651 4652 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4653 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4654 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4655 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4656 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4657 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4658 } 4659 } else { /* from local to global -> put data in coarse right hand side */ 4660 from = pcbddc->vec1_P; 4661 to = pcbddc->coarse_vec; 4662 } 4663 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4664 PetscFunctionReturn(0); 4665 } 4666 4667 #undef __FUNCT__ 4668 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4669 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4670 { 4671 PetscErrorCode ierr; 4672 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4673 PetscScalar *array; 4674 Vec from,to; 4675 4676 PetscFunctionBegin; 4677 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4678 from = pcbddc->coarse_vec; 4679 to = pcbddc->vec1_P; 4680 } else { /* from local to global -> put data in coarse right hand side */ 4681 from = pcbddc->vec1_P; 4682 to = pcbddc->coarse_vec; 4683 } 4684 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4685 if (smode == SCATTER_FORWARD) { 4686 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4687 Vec tvec; 4688 4689 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4690 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4691 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4692 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4693 } 4694 } else { 4695 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4696 ierr = VecResetArray(from);CHKERRQ(ierr); 4697 } 4698 } 4699 PetscFunctionReturn(0); 4700 } 4701 4702 /* uncomment for testing purposes */ 4703 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4704 #undef __FUNCT__ 4705 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4706 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4707 { 4708 PetscErrorCode ierr; 4709 PC_IS* pcis = (PC_IS*)(pc->data); 4710 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4711 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4712 /* one and zero */ 4713 PetscScalar one=1.0,zero=0.0; 4714 /* space to store constraints and their local indices */ 4715 PetscScalar *constraints_data; 4716 PetscInt *constraints_idxs,*constraints_idxs_B; 4717 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4718 PetscInt *constraints_n; 4719 /* iterators */ 4720 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4721 /* BLAS integers */ 4722 PetscBLASInt lwork,lierr; 4723 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4724 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4725 /* reuse */ 4726 PetscInt olocal_primal_size,olocal_primal_size_cc; 4727 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4728 /* change of basis */ 4729 PetscBool qr_needed; 4730 PetscBT change_basis,qr_needed_idx; 4731 /* auxiliary stuff */ 4732 PetscInt *nnz,*is_indices; 4733 PetscInt ncc; 4734 /* some quantities */ 4735 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4736 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4737 4738 PetscFunctionBegin; 4739 /* Destroy Mat objects computed previously */ 4740 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4741 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4742 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4743 /* save info on constraints from previous setup (if any) */ 4744 olocal_primal_size = pcbddc->local_primal_size; 4745 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4746 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4747 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4748 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4749 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4750 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4751 4752 if (!pcbddc->adaptive_selection) { 4753 IS ISForVertices,*ISForFaces,*ISForEdges; 4754 MatNullSpace nearnullsp; 4755 const Vec *nearnullvecs; 4756 Vec *localnearnullsp; 4757 PetscScalar *array; 4758 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4759 PetscBool nnsp_has_cnst; 4760 /* LAPACK working arrays for SVD or POD */ 4761 PetscBool skip_lapack,boolforchange; 4762 PetscScalar *work; 4763 PetscReal *singular_vals; 4764 #if defined(PETSC_USE_COMPLEX) 4765 PetscReal *rwork; 4766 #endif 4767 #if defined(PETSC_MISSING_LAPACK_GESVD) 4768 PetscScalar *temp_basis,*correlation_mat; 4769 #else 4770 PetscBLASInt dummy_int=1; 4771 PetscScalar dummy_scalar=1.; 4772 #endif 4773 4774 /* Get index sets for faces, edges and vertices from graph */ 4775 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4776 /* print some info */ 4777 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 4778 PetscInt nv; 4779 4780 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4781 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4782 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4783 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4784 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4785 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4786 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4787 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4788 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4789 } 4790 4791 /* free unneeded index sets */ 4792 if (!pcbddc->use_vertices) { 4793 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4794 } 4795 if (!pcbddc->use_edges) { 4796 for (i=0;i<n_ISForEdges;i++) { 4797 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4798 } 4799 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4800 n_ISForEdges = 0; 4801 } 4802 if (!pcbddc->use_faces) { 4803 for (i=0;i<n_ISForFaces;i++) { 4804 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4805 } 4806 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4807 n_ISForFaces = 0; 4808 } 4809 4810 /* check if near null space is attached to global mat */ 4811 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4812 if (nearnullsp) { 4813 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4814 /* remove any stored info */ 4815 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4816 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4817 /* store information for BDDC solver reuse */ 4818 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4819 pcbddc->onearnullspace = nearnullsp; 4820 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4821 for (i=0;i<nnsp_size;i++) { 4822 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4823 } 4824 } else { /* if near null space is not provided BDDC uses constants by default */ 4825 nnsp_size = 0; 4826 nnsp_has_cnst = PETSC_TRUE; 4827 } 4828 /* get max number of constraints on a single cc */ 4829 max_constraints = nnsp_size; 4830 if (nnsp_has_cnst) max_constraints++; 4831 4832 /* 4833 Evaluate maximum storage size needed by the procedure 4834 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4835 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4836 There can be multiple constraints per connected component 4837 */ 4838 n_vertices = 0; 4839 if (ISForVertices) { 4840 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4841 } 4842 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4843 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4844 4845 total_counts = n_ISForFaces+n_ISForEdges; 4846 total_counts *= max_constraints; 4847 total_counts += n_vertices; 4848 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4849 4850 total_counts = 0; 4851 max_size_of_constraint = 0; 4852 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4853 IS used_is; 4854 if (i<n_ISForEdges) { 4855 used_is = ISForEdges[i]; 4856 } else { 4857 used_is = ISForFaces[i-n_ISForEdges]; 4858 } 4859 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4860 total_counts += j; 4861 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4862 } 4863 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4864 4865 /* get local part of global near null space vectors */ 4866 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4867 for (k=0;k<nnsp_size;k++) { 4868 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4869 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4870 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4871 } 4872 4873 /* whether or not to skip lapack calls */ 4874 skip_lapack = PETSC_TRUE; 4875 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4876 4877 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4878 if (!skip_lapack) { 4879 PetscScalar temp_work; 4880 4881 #if defined(PETSC_MISSING_LAPACK_GESVD) 4882 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4883 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4884 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4885 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4886 #if defined(PETSC_USE_COMPLEX) 4887 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4888 #endif 4889 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4890 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4891 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4892 lwork = -1; 4893 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4894 #if !defined(PETSC_USE_COMPLEX) 4895 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4896 #else 4897 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4898 #endif 4899 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4900 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4901 #else /* on missing GESVD */ 4902 /* SVD */ 4903 PetscInt max_n,min_n; 4904 max_n = max_size_of_constraint; 4905 min_n = max_constraints; 4906 if (max_size_of_constraint < max_constraints) { 4907 min_n = max_size_of_constraint; 4908 max_n = max_constraints; 4909 } 4910 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 4911 #if defined(PETSC_USE_COMPLEX) 4912 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 4913 #endif 4914 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4915 lwork = -1; 4916 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 4917 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 4918 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 4919 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4920 #if !defined(PETSC_USE_COMPLEX) 4921 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 4922 #else 4923 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 4924 #endif 4925 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4926 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 4927 #endif /* on missing GESVD */ 4928 /* Allocate optimal workspace */ 4929 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 4930 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 4931 } 4932 /* Now we can loop on constraining sets */ 4933 total_counts = 0; 4934 constraints_idxs_ptr[0] = 0; 4935 constraints_data_ptr[0] = 0; 4936 /* vertices */ 4937 if (n_vertices) { 4938 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4939 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4940 for (i=0;i<n_vertices;i++) { 4941 constraints_n[total_counts] = 1; 4942 constraints_data[total_counts] = 1.0; 4943 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 4944 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 4945 total_counts++; 4946 } 4947 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4948 n_vertices = total_counts; 4949 } 4950 4951 /* edges and faces */ 4952 total_counts_cc = total_counts; 4953 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 4954 IS used_is; 4955 PetscBool idxs_copied = PETSC_FALSE; 4956 4957 if (ncc<n_ISForEdges) { 4958 used_is = ISForEdges[ncc]; 4959 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 4960 } else { 4961 used_is = ISForFaces[ncc-n_ISForEdges]; 4962 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 4963 } 4964 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 4965 4966 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 4967 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4968 /* change of basis should not be performed on local periodic nodes */ 4969 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 4970 if (nnsp_has_cnst) { 4971 PetscScalar quad_value; 4972 4973 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4974 idxs_copied = PETSC_TRUE; 4975 4976 if (!pcbddc->use_nnsp_true) { 4977 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 4978 } else { 4979 quad_value = 1.0; 4980 } 4981 for (j=0;j<size_of_constraint;j++) { 4982 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 4983 } 4984 temp_constraints++; 4985 total_counts++; 4986 } 4987 for (k=0;k<nnsp_size;k++) { 4988 PetscReal real_value; 4989 PetscScalar *ptr_to_data; 4990 4991 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4992 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 4993 for (j=0;j<size_of_constraint;j++) { 4994 ptr_to_data[j] = array[is_indices[j]]; 4995 } 4996 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4997 /* check if array is null on the connected component */ 4998 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4999 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5000 if (real_value > 0.0) { /* keep indices and values */ 5001 temp_constraints++; 5002 total_counts++; 5003 if (!idxs_copied) { 5004 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5005 idxs_copied = PETSC_TRUE; 5006 } 5007 } 5008 } 5009 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5010 valid_constraints = temp_constraints; 5011 if (!pcbddc->use_nnsp_true && temp_constraints) { 5012 if (temp_constraints == 1) { /* just normalize the constraint */ 5013 PetscScalar norm,*ptr_to_data; 5014 5015 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5016 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5017 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5018 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5019 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5020 } else { /* perform SVD */ 5021 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5022 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5023 5024 #if defined(PETSC_MISSING_LAPACK_GESVD) 5025 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5026 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5027 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5028 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5029 from that computed using LAPACKgesvd 5030 -> This is due to a different computation of eigenvectors in LAPACKheev 5031 -> The quality of the POD-computed basis will be the same */ 5032 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5033 /* Store upper triangular part of correlation matrix */ 5034 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5035 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5036 for (j=0;j<temp_constraints;j++) { 5037 for (k=0;k<j+1;k++) { 5038 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5039 } 5040 } 5041 /* compute eigenvalues and eigenvectors of correlation matrix */ 5042 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5043 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5044 #if !defined(PETSC_USE_COMPLEX) 5045 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5046 #else 5047 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5048 #endif 5049 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5050 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5051 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5052 j = 0; 5053 while (j < temp_constraints && singular_vals[j] < tol) j++; 5054 total_counts = total_counts-j; 5055 valid_constraints = temp_constraints-j; 5056 /* scale and copy POD basis into used quadrature memory */ 5057 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5058 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5059 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5060 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5061 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5062 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5063 if (j<temp_constraints) { 5064 PetscInt ii; 5065 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5066 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5067 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5068 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5069 for (k=0;k<temp_constraints-j;k++) { 5070 for (ii=0;ii<size_of_constraint;ii++) { 5071 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5072 } 5073 } 5074 } 5075 #else /* on missing GESVD */ 5076 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5077 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5078 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5079 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5080 #if !defined(PETSC_USE_COMPLEX) 5081 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5082 #else 5083 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5084 #endif 5085 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5086 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5087 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5088 k = temp_constraints; 5089 if (k > size_of_constraint) k = size_of_constraint; 5090 j = 0; 5091 while (j < k && singular_vals[k-j-1] < tol) j++; 5092 valid_constraints = k-j; 5093 total_counts = total_counts-temp_constraints+valid_constraints; 5094 #endif /* on missing GESVD */ 5095 } 5096 } 5097 /* update pointers information */ 5098 if (valid_constraints) { 5099 constraints_n[total_counts_cc] = valid_constraints; 5100 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5101 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5102 /* set change_of_basis flag */ 5103 if (boolforchange) { 5104 PetscBTSet(change_basis,total_counts_cc); 5105 } 5106 total_counts_cc++; 5107 } 5108 } 5109 /* free workspace */ 5110 if (!skip_lapack) { 5111 ierr = PetscFree(work);CHKERRQ(ierr); 5112 #if defined(PETSC_USE_COMPLEX) 5113 ierr = PetscFree(rwork);CHKERRQ(ierr); 5114 #endif 5115 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5116 #if defined(PETSC_MISSING_LAPACK_GESVD) 5117 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5118 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5119 #endif 5120 } 5121 for (k=0;k<nnsp_size;k++) { 5122 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5123 } 5124 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5125 /* free index sets of faces, edges and vertices */ 5126 for (i=0;i<n_ISForFaces;i++) { 5127 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5128 } 5129 if (n_ISForFaces) { 5130 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5131 } 5132 for (i=0;i<n_ISForEdges;i++) { 5133 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5134 } 5135 if (n_ISForEdges) { 5136 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5137 } 5138 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5139 } else { 5140 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5141 5142 total_counts = 0; 5143 n_vertices = 0; 5144 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5145 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5146 } 5147 max_constraints = 0; 5148 total_counts_cc = 0; 5149 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5150 total_counts += pcbddc->adaptive_constraints_n[i]; 5151 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5152 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5153 } 5154 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5155 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5156 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5157 constraints_data = pcbddc->adaptive_constraints_data; 5158 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5159 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5160 total_counts_cc = 0; 5161 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5162 if (pcbddc->adaptive_constraints_n[i]) { 5163 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5164 } 5165 } 5166 #if 0 5167 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5168 for (i=0;i<total_counts_cc;i++) { 5169 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5170 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5171 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5172 printf(" %d",constraints_idxs[j]); 5173 } 5174 printf("\n"); 5175 printf("number of cc: %d\n",constraints_n[i]); 5176 } 5177 for (i=0;i<n_vertices;i++) { 5178 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5179 } 5180 for (i=0;i<sub_schurs->n_subs;i++) { 5181 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5182 } 5183 #endif 5184 5185 max_size_of_constraint = 0; 5186 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5187 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5188 /* Change of basis */ 5189 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5190 if (pcbddc->use_change_of_basis) { 5191 for (i=0;i<sub_schurs->n_subs;i++) { 5192 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5193 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5194 } 5195 } 5196 } 5197 } 5198 pcbddc->local_primal_size = total_counts; 5199 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5200 5201 /* map constraints_idxs in boundary numbering */ 5202 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5203 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5204 5205 /* Create constraint matrix */ 5206 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5207 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5208 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5209 5210 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5211 /* determine if a QR strategy is needed for change of basis */ 5212 qr_needed = PETSC_FALSE; 5213 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5214 total_primal_vertices=0; 5215 pcbddc->local_primal_size_cc = 0; 5216 for (i=0;i<total_counts_cc;i++) { 5217 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5218 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5219 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5220 pcbddc->local_primal_size_cc += 1; 5221 } else if (PetscBTLookup(change_basis,i)) { 5222 for (k=0;k<constraints_n[i];k++) { 5223 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5224 } 5225 pcbddc->local_primal_size_cc += constraints_n[i]; 5226 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5227 PetscBTSet(qr_needed_idx,i); 5228 qr_needed = PETSC_TRUE; 5229 } 5230 } else { 5231 pcbddc->local_primal_size_cc += 1; 5232 } 5233 } 5234 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5235 pcbddc->n_vertices = total_primal_vertices; 5236 /* permute indices in order to have a sorted set of vertices */ 5237 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5238 5239 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5240 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5241 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5242 5243 /* nonzero structure of constraint matrix */ 5244 /* and get reference dof for local constraints */ 5245 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5246 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5247 5248 j = total_primal_vertices; 5249 total_counts = total_primal_vertices; 5250 cum = total_primal_vertices; 5251 for (i=n_vertices;i<total_counts_cc;i++) { 5252 if (!PetscBTLookup(change_basis,i)) { 5253 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5254 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5255 cum++; 5256 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5257 for (k=0;k<constraints_n[i];k++) { 5258 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5259 nnz[j+k] = size_of_constraint; 5260 } 5261 j += constraints_n[i]; 5262 } 5263 } 5264 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5265 ierr = PetscFree(nnz);CHKERRQ(ierr); 5266 5267 /* set values in constraint matrix */ 5268 for (i=0;i<total_primal_vertices;i++) { 5269 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5270 } 5271 total_counts = total_primal_vertices; 5272 for (i=n_vertices;i<total_counts_cc;i++) { 5273 if (!PetscBTLookup(change_basis,i)) { 5274 PetscInt *cols; 5275 5276 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5277 cols = constraints_idxs+constraints_idxs_ptr[i]; 5278 for (k=0;k<constraints_n[i];k++) { 5279 PetscInt row = total_counts+k; 5280 PetscScalar *vals; 5281 5282 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5283 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5284 } 5285 total_counts += constraints_n[i]; 5286 } 5287 } 5288 /* assembling */ 5289 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5290 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5291 5292 /* 5293 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5294 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5295 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5296 */ 5297 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5298 if (pcbddc->use_change_of_basis) { 5299 /* dual and primal dofs on a single cc */ 5300 PetscInt dual_dofs,primal_dofs; 5301 /* working stuff for GEQRF */ 5302 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5303 PetscBLASInt lqr_work; 5304 /* working stuff for UNGQR */ 5305 PetscScalar *gqr_work,lgqr_work_t; 5306 PetscBLASInt lgqr_work; 5307 /* working stuff for TRTRS */ 5308 PetscScalar *trs_rhs; 5309 PetscBLASInt Blas_NRHS; 5310 /* pointers for values insertion into change of basis matrix */ 5311 PetscInt *start_rows,*start_cols; 5312 PetscScalar *start_vals; 5313 /* working stuff for values insertion */ 5314 PetscBT is_primal; 5315 PetscInt *aux_primal_numbering_B; 5316 /* matrix sizes */ 5317 PetscInt global_size,local_size; 5318 /* temporary change of basis */ 5319 Mat localChangeOfBasisMatrix; 5320 /* extra space for debugging */ 5321 PetscScalar *dbg_work; 5322 5323 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5324 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5325 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5326 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5327 /* nonzeros for local mat */ 5328 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5329 if (!pcbddc->benign_change || pcbddc->fake_change) { 5330 for (i=0;i<pcis->n;i++) nnz[i]=1; 5331 } else { 5332 const PetscInt *ii; 5333 PetscInt n; 5334 PetscBool flg_row; 5335 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5336 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5337 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5338 } 5339 for (i=n_vertices;i<total_counts_cc;i++) { 5340 if (PetscBTLookup(change_basis,i)) { 5341 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5342 if (PetscBTLookup(qr_needed_idx,i)) { 5343 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5344 } else { 5345 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5346 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5347 } 5348 } 5349 } 5350 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5351 ierr = PetscFree(nnz);CHKERRQ(ierr); 5352 /* Set interior change in the matrix */ 5353 if (!pcbddc->benign_change || pcbddc->fake_change) { 5354 for (i=0;i<pcis->n;i++) { 5355 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5356 } 5357 } else { 5358 const PetscInt *ii,*jj; 5359 PetscScalar *aa; 5360 PetscInt n; 5361 PetscBool flg_row; 5362 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5363 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5364 for (i=0;i<n;i++) { 5365 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5366 } 5367 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5368 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5369 } 5370 5371 if (pcbddc->dbg_flag) { 5372 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5373 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5374 } 5375 5376 5377 /* Now we loop on the constraints which need a change of basis */ 5378 /* 5379 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5380 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5381 5382 Basic blocks of change of basis matrix T computed by 5383 5384 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5385 5386 | 1 0 ... 0 s_1/S | 5387 | 0 1 ... 0 s_2/S | 5388 | ... | 5389 | 0 ... 1 s_{n-1}/S | 5390 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5391 5392 with S = \sum_{i=1}^n s_i^2 5393 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5394 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5395 5396 - QR decomposition of constraints otherwise 5397 */ 5398 if (qr_needed) { 5399 /* space to store Q */ 5400 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5401 /* array to store scaling factors for reflectors */ 5402 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5403 /* first we issue queries for optimal work */ 5404 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5405 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5406 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5407 lqr_work = -1; 5408 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5409 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5410 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5411 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5412 lgqr_work = -1; 5413 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5414 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5415 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5416 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5417 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5418 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5419 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5420 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5421 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5422 /* array to store rhs and solution of triangular solver */ 5423 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5424 /* allocating workspace for check */ 5425 if (pcbddc->dbg_flag) { 5426 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5427 } 5428 } 5429 /* array to store whether a node is primal or not */ 5430 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5431 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5432 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5433 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5434 for (i=0;i<total_primal_vertices;i++) { 5435 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5436 } 5437 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5438 5439 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5440 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5441 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5442 if (PetscBTLookup(change_basis,total_counts)) { 5443 /* get constraint info */ 5444 primal_dofs = constraints_n[total_counts]; 5445 dual_dofs = size_of_constraint-primal_dofs; 5446 5447 if (pcbddc->dbg_flag) { 5448 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5449 } 5450 5451 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5452 5453 /* copy quadrature constraints for change of basis check */ 5454 if (pcbddc->dbg_flag) { 5455 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5456 } 5457 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5458 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5459 5460 /* compute QR decomposition of constraints */ 5461 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5462 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5463 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5464 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5465 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5466 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5467 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5468 5469 /* explictly compute R^-T */ 5470 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5471 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5472 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5473 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5474 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5475 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5476 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5477 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5478 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5479 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5480 5481 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5482 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5483 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5484 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5485 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5486 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5487 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5488 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5489 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5490 5491 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5492 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5493 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5494 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5495 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5496 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5497 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5498 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5499 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5500 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5501 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5502 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5503 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5504 5505 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5506 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5507 /* insert cols for primal dofs */ 5508 for (j=0;j<primal_dofs;j++) { 5509 start_vals = &qr_basis[j*size_of_constraint]; 5510 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5511 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5512 } 5513 /* insert cols for dual dofs */ 5514 for (j=0,k=0;j<dual_dofs;k++) { 5515 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5516 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5517 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5518 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5519 j++; 5520 } 5521 } 5522 5523 /* check change of basis */ 5524 if (pcbddc->dbg_flag) { 5525 PetscInt ii,jj; 5526 PetscBool valid_qr=PETSC_TRUE; 5527 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5529 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5530 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5531 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5532 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5533 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5534 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5535 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5536 for (jj=0;jj<size_of_constraint;jj++) { 5537 for (ii=0;ii<primal_dofs;ii++) { 5538 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5539 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5540 } 5541 } 5542 if (!valid_qr) { 5543 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5544 for (jj=0;jj<size_of_constraint;jj++) { 5545 for (ii=0;ii<primal_dofs;ii++) { 5546 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5547 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5548 } 5549 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5550 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5551 } 5552 } 5553 } 5554 } else { 5555 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5556 } 5557 } 5558 } else { /* simple transformation block */ 5559 PetscInt row,col; 5560 PetscScalar val,norm; 5561 5562 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5563 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5564 for (j=0;j<size_of_constraint;j++) { 5565 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5566 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5567 if (!PetscBTLookup(is_primal,row_B)) { 5568 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5569 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5570 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5571 } else { 5572 for (k=0;k<size_of_constraint;k++) { 5573 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5574 if (row != col) { 5575 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5576 } else { 5577 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5578 } 5579 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5580 } 5581 } 5582 } 5583 if (pcbddc->dbg_flag) { 5584 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5585 } 5586 } 5587 } else { 5588 if (pcbddc->dbg_flag) { 5589 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5590 } 5591 } 5592 } 5593 5594 /* free workspace */ 5595 if (qr_needed) { 5596 if (pcbddc->dbg_flag) { 5597 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5598 } 5599 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5600 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5601 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5602 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5603 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5604 } 5605 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5606 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5607 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5608 5609 /* assembling of global change of variable */ 5610 if (!pcbddc->fake_change) { 5611 Mat tmat; 5612 PetscInt bs; 5613 5614 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5615 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5616 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5617 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5618 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5619 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5620 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5621 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5622 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5623 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5624 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5625 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5626 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5627 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5628 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5629 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5630 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5631 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5632 5633 /* check */ 5634 if (pcbddc->dbg_flag) { 5635 PetscReal error; 5636 Vec x,x_change; 5637 5638 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5639 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5640 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5641 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5642 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5643 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5644 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5645 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5646 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5647 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5648 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5649 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5650 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5651 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5652 ierr = VecDestroy(&x);CHKERRQ(ierr); 5653 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5654 } 5655 /* adapt sub_schurs computed (if any) */ 5656 if (pcbddc->use_deluxe_scaling) { 5657 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5658 5659 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5660 if (sub_schurs && sub_schurs->S_Ej_all) { 5661 Mat S_new,tmat; 5662 IS is_all_N,is_V_Sall = NULL; 5663 5664 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5665 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5666 if (pcbddc->deluxe_zerorows) { 5667 ISLocalToGlobalMapping NtoSall; 5668 IS is_V; 5669 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5670 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5671 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5672 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5673 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5674 } 5675 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5676 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5677 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5678 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5679 if (pcbddc->deluxe_zerorows) { 5680 const PetscScalar *array; 5681 const PetscInt *idxs_V,*idxs_all; 5682 PetscInt i,n_V; 5683 5684 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5685 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5686 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5687 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5688 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5689 for (i=0;i<n_V;i++) { 5690 PetscScalar val; 5691 PetscInt idx; 5692 5693 idx = idxs_V[i]; 5694 val = array[idxs_all[idxs_V[i]]]; 5695 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5696 } 5697 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5698 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5700 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5701 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5702 } 5703 sub_schurs->S_Ej_all = S_new; 5704 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5705 if (sub_schurs->sum_S_Ej_all) { 5706 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5707 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5708 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5709 if (pcbddc->deluxe_zerorows) { 5710 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5711 } 5712 sub_schurs->sum_S_Ej_all = S_new; 5713 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5714 } 5715 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5716 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5717 } 5718 /* destroy any change of basis context in sub_schurs */ 5719 if (sub_schurs && sub_schurs->change) { 5720 PetscInt i; 5721 5722 for (i=0;i<sub_schurs->n_subs;i++) { 5723 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5724 } 5725 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5726 } 5727 } 5728 if (pcbddc->switch_static) { /* need to save the local change */ 5729 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5730 } else { 5731 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5732 } 5733 /* determine if any process has changed the pressures locally */ 5734 pcbddc->change_interior = pcbddc->benign_have_null; 5735 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5736 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5737 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5738 pcbddc->use_qr_single = qr_needed; 5739 } 5740 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5741 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5742 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5743 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5744 } else { 5745 Mat benign_global = NULL; 5746 if (pcbddc->benign_have_null) { 5747 Mat tmat; 5748 5749 pcbddc->change_interior = PETSC_TRUE; 5750 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5751 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5752 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5753 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5754 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5755 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5756 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5757 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5758 if (pcbddc->benign_change) { 5759 Mat M; 5760 5761 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5762 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5763 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5764 ierr = MatDestroy(&M);CHKERRQ(ierr); 5765 } else { 5766 Mat eye; 5767 PetscScalar *array; 5768 5769 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5770 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5771 for (i=0;i<pcis->n;i++) { 5772 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5773 } 5774 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5775 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5776 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5777 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5778 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5779 } 5780 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5781 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5782 } 5783 if (pcbddc->user_ChangeOfBasisMatrix) { 5784 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5785 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5786 } else if (pcbddc->benign_have_null) { 5787 pcbddc->ChangeOfBasisMatrix = benign_global; 5788 } 5789 } 5790 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5791 IS is_global; 5792 const PetscInt *gidxs; 5793 5794 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5795 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5796 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5797 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5798 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5799 } 5800 } 5801 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5802 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5803 } 5804 5805 if (!pcbddc->fake_change) { 5806 /* add pressure dofs to set of primal nodes for numbering purposes */ 5807 for (i=0;i<pcbddc->benign_n;i++) { 5808 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5809 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5810 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5811 pcbddc->local_primal_size_cc++; 5812 pcbddc->local_primal_size++; 5813 } 5814 5815 /* check if a new primal space has been introduced (also take into account benign trick) */ 5816 pcbddc->new_primal_space_local = PETSC_TRUE; 5817 if (olocal_primal_size == pcbddc->local_primal_size) { 5818 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5819 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5820 if (!pcbddc->new_primal_space_local) { 5821 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5822 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5823 } 5824 } 5825 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5826 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5827 } 5828 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5829 5830 /* flush dbg viewer */ 5831 if (pcbddc->dbg_flag) { 5832 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5833 } 5834 5835 /* free workspace */ 5836 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5837 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5838 if (!pcbddc->adaptive_selection) { 5839 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5840 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5841 } else { 5842 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5843 pcbddc->adaptive_constraints_idxs_ptr, 5844 pcbddc->adaptive_constraints_data_ptr, 5845 pcbddc->adaptive_constraints_idxs, 5846 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5847 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5848 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5849 } 5850 PetscFunctionReturn(0); 5851 } 5852 5853 #undef __FUNCT__ 5854 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5855 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5856 { 5857 ISLocalToGlobalMapping map; 5858 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5859 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5860 PetscInt ierr,i,N; 5861 5862 PetscFunctionBegin; 5863 if (pcbddc->recompute_topography) { 5864 /* Reset previously computed graph */ 5865 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5866 /* Init local Graph struct */ 5867 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5868 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5869 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5870 5871 /* Check validity of the csr graph passed in by the user */ 5872 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5873 5874 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5875 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5876 PetscInt *xadj,*adjncy; 5877 PetscInt nvtxs; 5878 PetscBool flg_row=PETSC_FALSE; 5879 5880 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5881 if (flg_row) { 5882 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5883 pcbddc->computed_rowadj = PETSC_TRUE; 5884 } 5885 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5886 } 5887 if (pcbddc->dbg_flag) { 5888 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5889 } 5890 5891 /* Setup of Graph */ 5892 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5893 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5894 5895 /* attach info on disconnected subdomains if present */ 5896 if (pcbddc->n_local_subs) { 5897 PetscInt *local_subs; 5898 5899 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5900 for (i=0;i<pcbddc->n_local_subs;i++) { 5901 const PetscInt *idxs; 5902 PetscInt nl,j; 5903 5904 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5905 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5906 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 5907 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5908 } 5909 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 5910 pcbddc->mat_graph->local_subs = local_subs; 5911 } 5912 pcbddc->graphanalyzed = PETSC_FALSE; 5913 } 5914 5915 if (!pcbddc->graphanalyzed) { 5916 /* Graph's connected components analysis */ 5917 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 5918 pcbddc->graphanalyzed = PETSC_TRUE; 5919 } 5920 PetscFunctionReturn(0); 5921 } 5922 5923 #undef __FUNCT__ 5924 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 5925 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 5926 { 5927 PetscInt i,j; 5928 PetscScalar *alphas; 5929 PetscErrorCode ierr; 5930 5931 PetscFunctionBegin; 5932 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 5933 for (i=0;i<n;i++) { 5934 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 5935 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 5936 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 5937 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5938 } 5939 ierr = PetscFree(alphas);CHKERRQ(ierr); 5940 PetscFunctionReturn(0); 5941 } 5942 5943 #undef __FUNCT__ 5944 #define __FUNCT__ "MatISGetSubassemblingPattern" 5945 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5946 { 5947 Mat A; 5948 PetscInt n_neighs,*neighs,*n_shared,**shared; 5949 PetscMPIInt size,rank,color; 5950 PetscInt *xadj,*adjncy; 5951 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5952 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5953 PetscInt void_procs,*procs_candidates = NULL; 5954 PetscInt xadj_count, *count; 5955 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5956 PetscSubcomm psubcomm; 5957 MPI_Comm subcomm; 5958 PetscErrorCode ierr; 5959 5960 PetscFunctionBegin; 5961 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5962 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5963 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5964 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5965 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5966 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5967 5968 if (have_void) *have_void = PETSC_FALSE; 5969 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5970 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5971 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5972 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5973 im_active = !!(n); 5974 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5975 void_procs = size - active_procs; 5976 /* get ranks of of non-active processes in mat communicator */ 5977 if (void_procs) { 5978 PetscInt ncand; 5979 5980 if (have_void) *have_void = PETSC_TRUE; 5981 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5982 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5983 for (i=0,ncand=0;i<size;i++) { 5984 if (!procs_candidates[i]) { 5985 procs_candidates[ncand++] = i; 5986 } 5987 } 5988 /* force n_subdomains to be not greater that the number of non-active processes */ 5989 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5990 } 5991 5992 /* number of subdomains requested greater than active processes -> just shift the matrix 5993 number of subdomains requested 1 -> send to master or first candidate in voids */ 5994 if (active_procs < *n_subdomains || *n_subdomains == 1) { 5995 PetscInt issize,isidx,dest; 5996 if (*n_subdomains == 1) dest = 0; 5997 else dest = rank; 5998 if (im_active) { 5999 issize = 1; 6000 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6001 isidx = procs_candidates[dest]; 6002 } else { 6003 isidx = dest; 6004 } 6005 } else { 6006 issize = 0; 6007 isidx = -1; 6008 } 6009 *n_subdomains = active_procs; 6010 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6011 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6012 PetscFunctionReturn(0); 6013 } 6014 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6015 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6016 threshold = PetscMax(threshold,2); 6017 6018 /* Get info on mapping */ 6019 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 6020 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6021 6022 /* build local CSR graph of subdomains' connectivity */ 6023 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6024 xadj[0] = 0; 6025 xadj[1] = PetscMax(n_neighs-1,0); 6026 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6027 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6028 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 6029 for (i=1;i<n_neighs;i++) 6030 for (j=0;j<n_shared[i];j++) 6031 count[shared[i][j]] += 1; 6032 6033 xadj_count = 0; 6034 for (i=1;i<n_neighs;i++) { 6035 for (j=0;j<n_shared[i];j++) { 6036 if (count[shared[i][j]] < threshold) { 6037 adjncy[xadj_count] = neighs[i]; 6038 adjncy_wgt[xadj_count] = n_shared[i]; 6039 xadj_count++; 6040 break; 6041 } 6042 } 6043 } 6044 xadj[1] = xadj_count; 6045 ierr = PetscFree(count);CHKERRQ(ierr); 6046 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6047 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6048 6049 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6050 6051 /* Restrict work on active processes only */ 6052 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6053 if (void_procs) { 6054 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6055 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6056 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6057 subcomm = PetscSubcommChild(psubcomm); 6058 } else { 6059 psubcomm = NULL; 6060 subcomm = PetscObjectComm((PetscObject)mat); 6061 } 6062 6063 v_wgt = NULL; 6064 if (!color) { 6065 ierr = PetscFree(xadj);CHKERRQ(ierr); 6066 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6067 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6068 } else { 6069 Mat subdomain_adj; 6070 IS new_ranks,new_ranks_contig; 6071 MatPartitioning partitioner; 6072 PetscInt rstart=0,rend=0; 6073 PetscInt *is_indices,*oldranks; 6074 PetscMPIInt size; 6075 PetscBool aggregate; 6076 6077 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6078 if (void_procs) { 6079 PetscInt prank = rank; 6080 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6081 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6082 for (i=0;i<xadj[1];i++) { 6083 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6084 } 6085 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6086 } else { 6087 oldranks = NULL; 6088 } 6089 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6090 if (aggregate) { /* TODO: all this part could be made more efficient */ 6091 PetscInt lrows,row,ncols,*cols; 6092 PetscMPIInt nrank; 6093 PetscScalar *vals; 6094 6095 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6096 lrows = 0; 6097 if (nrank<redprocs) { 6098 lrows = size/redprocs; 6099 if (nrank<size%redprocs) lrows++; 6100 } 6101 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6102 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6103 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6104 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6105 row = nrank; 6106 ncols = xadj[1]-xadj[0]; 6107 cols = adjncy; 6108 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6109 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6110 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6111 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6112 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6113 ierr = PetscFree(xadj);CHKERRQ(ierr); 6114 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6115 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6116 ierr = PetscFree(vals);CHKERRQ(ierr); 6117 if (use_vwgt) { 6118 Vec v; 6119 const PetscScalar *array; 6120 PetscInt nl; 6121 6122 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6123 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6124 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6125 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6126 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6127 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6128 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6129 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6130 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6131 ierr = VecDestroy(&v);CHKERRQ(ierr); 6132 } 6133 } else { 6134 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6135 if (use_vwgt) { 6136 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6137 v_wgt[0] = local_size; 6138 } 6139 } 6140 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6141 6142 /* Partition */ 6143 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6144 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6145 if (v_wgt) { 6146 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6147 } 6148 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6149 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6150 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6151 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6152 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6153 6154 /* renumber new_ranks to avoid "holes" in new set of processors */ 6155 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6156 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6157 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6158 if (!aggregate) { 6159 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6160 #if defined(PETSC_USE_DEBUG) 6161 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6162 #endif 6163 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6164 } else if (oldranks) { 6165 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6166 } else { 6167 ranks_send_to_idx[0] = is_indices[0]; 6168 } 6169 } else { 6170 PetscInt idxs[1]; 6171 PetscMPIInt tag; 6172 MPI_Request *reqs; 6173 6174 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6175 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6176 for (i=rstart;i<rend;i++) { 6177 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6178 } 6179 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6180 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6181 ierr = PetscFree(reqs);CHKERRQ(ierr); 6182 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6183 #if defined(PETSC_USE_DEBUG) 6184 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6185 #endif 6186 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6187 } else if (oldranks) { 6188 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6189 } else { 6190 ranks_send_to_idx[0] = idxs[0]; 6191 } 6192 } 6193 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6194 /* clean up */ 6195 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6196 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6197 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6198 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6199 } 6200 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6201 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6202 6203 /* assemble parallel IS for sends */ 6204 i = 1; 6205 if (!color) i=0; 6206 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6207 PetscFunctionReturn(0); 6208 } 6209 6210 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6211 6212 #undef __FUNCT__ 6213 #define __FUNCT__ "MatISSubassemble" 6214 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6215 { 6216 Mat local_mat; 6217 IS is_sends_internal; 6218 PetscInt rows,cols,new_local_rows; 6219 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6220 PetscBool ismatis,isdense,newisdense,destroy_mat; 6221 ISLocalToGlobalMapping l2gmap; 6222 PetscInt* l2gmap_indices; 6223 const PetscInt* is_indices; 6224 MatType new_local_type; 6225 /* buffers */ 6226 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6227 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6228 PetscInt *recv_buffer_idxs_local; 6229 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6230 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6231 /* MPI */ 6232 MPI_Comm comm,comm_n; 6233 PetscSubcomm subcomm; 6234 PetscMPIInt n_sends,n_recvs,commsize; 6235 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6236 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6237 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6238 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6239 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6240 PetscErrorCode ierr; 6241 6242 PetscFunctionBegin; 6243 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6244 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6245 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6246 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6247 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6248 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6249 PetscValidLogicalCollectiveBool(mat,reuse,6); 6250 PetscValidLogicalCollectiveInt(mat,nis,8); 6251 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6252 if (nvecs) { 6253 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6254 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6255 } 6256 /* further checks */ 6257 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6258 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6259 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6260 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6261 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6262 if (reuse && *mat_n) { 6263 PetscInt mrows,mcols,mnrows,mncols; 6264 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6265 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6266 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6267 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6268 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6269 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6270 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6271 } 6272 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6273 PetscValidLogicalCollectiveInt(mat,bs,0); 6274 6275 /* prepare IS for sending if not provided */ 6276 if (!is_sends) { 6277 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6278 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6279 } else { 6280 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6281 is_sends_internal = is_sends; 6282 } 6283 6284 /* get comm */ 6285 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6286 6287 /* compute number of sends */ 6288 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6289 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6290 6291 /* compute number of receives */ 6292 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6293 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6294 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6295 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6296 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6297 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6298 ierr = PetscFree(iflags);CHKERRQ(ierr); 6299 6300 /* restrict comm if requested */ 6301 subcomm = 0; 6302 destroy_mat = PETSC_FALSE; 6303 if (restrict_comm) { 6304 PetscMPIInt color,subcommsize; 6305 6306 color = 0; 6307 if (restrict_full) { 6308 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6309 } else { 6310 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6311 } 6312 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6313 subcommsize = commsize - subcommsize; 6314 /* check if reuse has been requested */ 6315 if (reuse) { 6316 if (*mat_n) { 6317 PetscMPIInt subcommsize2; 6318 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6319 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6320 comm_n = PetscObjectComm((PetscObject)*mat_n); 6321 } else { 6322 comm_n = PETSC_COMM_SELF; 6323 } 6324 } else { /* MAT_INITIAL_MATRIX */ 6325 PetscMPIInt rank; 6326 6327 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6328 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6329 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6330 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6331 comm_n = PetscSubcommChild(subcomm); 6332 } 6333 /* flag to destroy *mat_n if not significative */ 6334 if (color) destroy_mat = PETSC_TRUE; 6335 } else { 6336 comm_n = comm; 6337 } 6338 6339 /* prepare send/receive buffers */ 6340 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6341 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6342 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6343 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6344 if (nis) { 6345 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6346 } 6347 6348 /* Get data from local matrices */ 6349 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6350 /* TODO: See below some guidelines on how to prepare the local buffers */ 6351 /* 6352 send_buffer_vals should contain the raw values of the local matrix 6353 send_buffer_idxs should contain: 6354 - MatType_PRIVATE type 6355 - PetscInt size_of_l2gmap 6356 - PetscInt global_row_indices[size_of_l2gmap] 6357 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6358 */ 6359 else { 6360 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6361 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6362 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6363 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6364 send_buffer_idxs[1] = i; 6365 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6366 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6367 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6368 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6369 for (i=0;i<n_sends;i++) { 6370 ilengths_vals[is_indices[i]] = len*len; 6371 ilengths_idxs[is_indices[i]] = len+2; 6372 } 6373 } 6374 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6375 /* additional is (if any) */ 6376 if (nis) { 6377 PetscMPIInt psum; 6378 PetscInt j; 6379 for (j=0,psum=0;j<nis;j++) { 6380 PetscInt plen; 6381 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6382 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6383 psum += len+1; /* indices + lenght */ 6384 } 6385 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6386 for (j=0,psum=0;j<nis;j++) { 6387 PetscInt plen; 6388 const PetscInt *is_array_idxs; 6389 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6390 send_buffer_idxs_is[psum] = plen; 6391 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6392 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6393 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6394 psum += plen+1; /* indices + lenght */ 6395 } 6396 for (i=0;i<n_sends;i++) { 6397 ilengths_idxs_is[is_indices[i]] = psum; 6398 } 6399 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6400 } 6401 6402 buf_size_idxs = 0; 6403 buf_size_vals = 0; 6404 buf_size_idxs_is = 0; 6405 buf_size_vecs = 0; 6406 for (i=0;i<n_recvs;i++) { 6407 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6408 buf_size_vals += (PetscInt)olengths_vals[i]; 6409 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6410 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6411 } 6412 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6413 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6414 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6415 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6416 6417 /* get new tags for clean communications */ 6418 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6419 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6420 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6421 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6422 6423 /* allocate for requests */ 6424 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6425 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6426 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6427 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6428 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6429 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6430 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6431 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6432 6433 /* communications */ 6434 ptr_idxs = recv_buffer_idxs; 6435 ptr_vals = recv_buffer_vals; 6436 ptr_idxs_is = recv_buffer_idxs_is; 6437 ptr_vecs = recv_buffer_vecs; 6438 for (i=0;i<n_recvs;i++) { 6439 source_dest = onodes[i]; 6440 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6441 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6442 ptr_idxs += olengths_idxs[i]; 6443 ptr_vals += olengths_vals[i]; 6444 if (nis) { 6445 source_dest = onodes_is[i]; 6446 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6447 ptr_idxs_is += olengths_idxs_is[i]; 6448 } 6449 if (nvecs) { 6450 source_dest = onodes[i]; 6451 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6452 ptr_vecs += olengths_idxs[i]-2; 6453 } 6454 } 6455 for (i=0;i<n_sends;i++) { 6456 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6457 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6458 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6459 if (nis) { 6460 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6461 } 6462 if (nvecs) { 6463 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6464 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6465 } 6466 } 6467 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6468 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6469 6470 /* assemble new l2g map */ 6471 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6472 ptr_idxs = recv_buffer_idxs; 6473 new_local_rows = 0; 6474 for (i=0;i<n_recvs;i++) { 6475 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6476 ptr_idxs += olengths_idxs[i]; 6477 } 6478 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6479 ptr_idxs = recv_buffer_idxs; 6480 new_local_rows = 0; 6481 for (i=0;i<n_recvs;i++) { 6482 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6483 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6484 ptr_idxs += olengths_idxs[i]; 6485 } 6486 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6487 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6488 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6489 6490 /* infer new local matrix type from received local matrices type */ 6491 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6492 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6493 if (n_recvs) { 6494 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6495 ptr_idxs = recv_buffer_idxs; 6496 for (i=0;i<n_recvs;i++) { 6497 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6498 new_local_type_private = MATAIJ_PRIVATE; 6499 break; 6500 } 6501 ptr_idxs += olengths_idxs[i]; 6502 } 6503 switch (new_local_type_private) { 6504 case MATDENSE_PRIVATE: 6505 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6506 new_local_type = MATSEQAIJ; 6507 bs = 1; 6508 } else { /* if I receive only 1 dense matrix */ 6509 new_local_type = MATSEQDENSE; 6510 bs = 1; 6511 } 6512 break; 6513 case MATAIJ_PRIVATE: 6514 new_local_type = MATSEQAIJ; 6515 bs = 1; 6516 break; 6517 case MATBAIJ_PRIVATE: 6518 new_local_type = MATSEQBAIJ; 6519 break; 6520 case MATSBAIJ_PRIVATE: 6521 new_local_type = MATSEQSBAIJ; 6522 break; 6523 default: 6524 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6525 break; 6526 } 6527 } else { /* by default, new_local_type is seqdense */ 6528 new_local_type = MATSEQDENSE; 6529 bs = 1; 6530 } 6531 6532 /* create MATIS object if needed */ 6533 if (!reuse) { 6534 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6535 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6536 } else { 6537 /* it also destroys the local matrices */ 6538 if (*mat_n) { 6539 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6540 } else { /* this is a fake object */ 6541 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6542 } 6543 } 6544 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6545 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6546 6547 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6548 6549 /* Global to local map of received indices */ 6550 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6551 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6552 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6553 6554 /* restore attributes -> type of incoming data and its size */ 6555 buf_size_idxs = 0; 6556 for (i=0;i<n_recvs;i++) { 6557 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6558 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6559 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6560 } 6561 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6562 6563 /* set preallocation */ 6564 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6565 if (!newisdense) { 6566 PetscInt *new_local_nnz=0; 6567 6568 ptr_idxs = recv_buffer_idxs_local; 6569 if (n_recvs) { 6570 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6571 } 6572 for (i=0;i<n_recvs;i++) { 6573 PetscInt j; 6574 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6575 for (j=0;j<*(ptr_idxs+1);j++) { 6576 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6577 } 6578 } else { 6579 /* TODO */ 6580 } 6581 ptr_idxs += olengths_idxs[i]; 6582 } 6583 if (new_local_nnz) { 6584 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6585 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6586 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6587 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6588 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6589 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6590 } else { 6591 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6592 } 6593 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6594 } else { 6595 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6596 } 6597 6598 /* set values */ 6599 ptr_vals = recv_buffer_vals; 6600 ptr_idxs = recv_buffer_idxs_local; 6601 for (i=0;i<n_recvs;i++) { 6602 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6603 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6604 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6605 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6606 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6607 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6608 } else { 6609 /* TODO */ 6610 } 6611 ptr_idxs += olengths_idxs[i]; 6612 ptr_vals += olengths_vals[i]; 6613 } 6614 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6615 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6616 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6617 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6618 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6619 6620 #if 0 6621 if (!restrict_comm) { /* check */ 6622 Vec lvec,rvec; 6623 PetscReal infty_error; 6624 6625 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6626 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6627 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6628 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6629 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6630 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6631 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6632 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6633 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6634 } 6635 #endif 6636 6637 /* assemble new additional is (if any) */ 6638 if (nis) { 6639 PetscInt **temp_idxs,*count_is,j,psum; 6640 6641 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6642 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6643 ptr_idxs = recv_buffer_idxs_is; 6644 psum = 0; 6645 for (i=0;i<n_recvs;i++) { 6646 for (j=0;j<nis;j++) { 6647 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6648 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6649 psum += plen; 6650 ptr_idxs += plen+1; /* shift pointer to received data */ 6651 } 6652 } 6653 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6654 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6655 for (i=1;i<nis;i++) { 6656 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6657 } 6658 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6659 ptr_idxs = recv_buffer_idxs_is; 6660 for (i=0;i<n_recvs;i++) { 6661 for (j=0;j<nis;j++) { 6662 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6663 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6664 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6665 ptr_idxs += plen+1; /* shift pointer to received data */ 6666 } 6667 } 6668 for (i=0;i<nis;i++) { 6669 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6670 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6671 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6672 } 6673 ierr = PetscFree(count_is);CHKERRQ(ierr); 6674 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6675 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6676 } 6677 /* free workspace */ 6678 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6679 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6680 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6681 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6682 if (isdense) { 6683 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6684 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6685 } else { 6686 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6687 } 6688 if (nis) { 6689 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6690 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6691 } 6692 6693 if (nvecs) { 6694 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6695 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6696 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6697 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6698 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6699 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6700 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6701 /* set values */ 6702 ptr_vals = recv_buffer_vecs; 6703 ptr_idxs = recv_buffer_idxs_local; 6704 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6705 for (i=0;i<n_recvs;i++) { 6706 PetscInt j; 6707 for (j=0;j<*(ptr_idxs+1);j++) { 6708 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6709 } 6710 ptr_idxs += olengths_idxs[i]; 6711 ptr_vals += olengths_idxs[i]-2; 6712 } 6713 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6714 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6715 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6716 } 6717 6718 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6719 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6720 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6721 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6722 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6723 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6724 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6725 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6726 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6727 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6728 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6729 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6730 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6731 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6732 ierr = PetscFree(onodes);CHKERRQ(ierr); 6733 if (nis) { 6734 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6735 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6736 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6737 } 6738 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6739 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6740 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6741 for (i=0;i<nis;i++) { 6742 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6743 } 6744 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6745 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6746 } 6747 *mat_n = NULL; 6748 } 6749 PetscFunctionReturn(0); 6750 } 6751 6752 /* temporary hack into ksp private data structure */ 6753 #include <petsc/private/kspimpl.h> 6754 6755 #undef __FUNCT__ 6756 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6757 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6758 { 6759 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6760 PC_IS *pcis = (PC_IS*)pc->data; 6761 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6762 Mat coarsedivudotp = NULL; 6763 MatNullSpace CoarseNullSpace = NULL; 6764 ISLocalToGlobalMapping coarse_islg; 6765 IS coarse_is,*isarray; 6766 PetscInt i,im_active=-1,active_procs=-1; 6767 PetscInt nis,nisdofs,nisneu,nisvert; 6768 PC pc_temp; 6769 PCType coarse_pc_type; 6770 KSPType coarse_ksp_type; 6771 PetscBool multilevel_requested,multilevel_allowed; 6772 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6773 Mat t_coarse_mat_is; 6774 PetscInt ncoarse; 6775 PetscBool compute_vecs = PETSC_FALSE; 6776 PetscScalar *array; 6777 MatReuse coarse_mat_reuse; 6778 PetscBool restr, full_restr, have_void; 6779 PetscErrorCode ierr; 6780 6781 PetscFunctionBegin; 6782 /* Assign global numbering to coarse dofs */ 6783 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6784 PetscInt ocoarse_size; 6785 compute_vecs = PETSC_TRUE; 6786 ocoarse_size = pcbddc->coarse_size; 6787 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6788 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6789 /* see if we can avoid some work */ 6790 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6791 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6792 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6793 PC pc; 6794 PetscBool isbddc; 6795 6796 /* temporary workaround since PCBDDC does not have a reset method so far */ 6797 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6798 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6799 if (isbddc) { 6800 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6801 } else { 6802 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6803 } 6804 coarse_reuse = PETSC_FALSE; 6805 } else { /* we can safely reuse already computed coarse matrix */ 6806 coarse_reuse = PETSC_TRUE; 6807 } 6808 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6809 coarse_reuse = PETSC_FALSE; 6810 } 6811 /* reset any subassembling information */ 6812 if (!coarse_reuse || pcbddc->recompute_topography) { 6813 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6814 } 6815 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6816 coarse_reuse = PETSC_TRUE; 6817 } 6818 /* assemble coarse matrix */ 6819 if (coarse_reuse && pcbddc->coarse_ksp) { 6820 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6821 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6822 coarse_mat_reuse = MAT_REUSE_MATRIX; 6823 } else { 6824 coarse_mat = NULL; 6825 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6826 } 6827 6828 /* creates temporary l2gmap and IS for coarse indexes */ 6829 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6830 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6831 6832 /* creates temporary MATIS object for coarse matrix */ 6833 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6834 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6835 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6836 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6837 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6838 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6839 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6840 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6841 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6842 6843 /* count "active" (i.e. with positive local size) and "void" processes */ 6844 im_active = !!(pcis->n); 6845 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6846 6847 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6848 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6849 /* full_restr : just use the receivers from the subassembling pattern */ 6850 coarse_mat_is = NULL; 6851 multilevel_allowed = PETSC_FALSE; 6852 multilevel_requested = PETSC_FALSE; 6853 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6854 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6855 if (multilevel_requested) { 6856 ncoarse = active_procs/pcbddc->coarsening_ratio; 6857 restr = PETSC_FALSE; 6858 full_restr = PETSC_FALSE; 6859 } else { 6860 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6861 restr = PETSC_TRUE; 6862 full_restr = PETSC_TRUE; 6863 } 6864 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6865 ncoarse = PetscMax(1,ncoarse); 6866 if (!pcbddc->coarse_subassembling) { 6867 if (pcbddc->coarsening_ratio > 1) { 6868 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6869 } else { 6870 PetscMPIInt size,rank; 6871 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6872 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6873 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6874 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6875 } 6876 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6877 PetscInt psum; 6878 PetscMPIInt size; 6879 if (pcbddc->coarse_ksp) psum = 1; 6880 else psum = 0; 6881 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6882 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6883 if (ncoarse < size) have_void = PETSC_TRUE; 6884 } 6885 /* determine if we can go multilevel */ 6886 if (multilevel_requested) { 6887 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6888 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6889 } 6890 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6891 6892 /* dump subassembling pattern */ 6893 if (pcbddc->dbg_flag && multilevel_allowed) { 6894 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6895 } 6896 6897 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6898 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6899 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6900 const PetscInt *idxs; 6901 ISLocalToGlobalMapping tmap; 6902 6903 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6904 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6905 /* allocate space for temporary storage */ 6906 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6907 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 6908 /* allocate for IS array */ 6909 nisdofs = pcbddc->n_ISForDofsLocal; 6910 nisneu = !!pcbddc->NeumannBoundariesLocal; 6911 nisvert = 0; /* nisvert is not used */ 6912 nis = nisdofs + nisneu + nisvert; 6913 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 6914 /* dofs splitting */ 6915 for (i=0;i<nisdofs;i++) { 6916 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 6917 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 6918 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6919 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6920 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6921 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6922 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6923 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 6924 } 6925 /* neumann boundaries */ 6926 if (pcbddc->NeumannBoundariesLocal) { 6927 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 6928 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 6929 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6930 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6931 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6932 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6933 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 6934 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 6935 } 6936 /* free memory */ 6937 ierr = PetscFree(tidxs);CHKERRQ(ierr); 6938 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 6939 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6940 } else { 6941 nis = 0; 6942 nisdofs = 0; 6943 nisneu = 0; 6944 nisvert = 0; 6945 isarray = NULL; 6946 } 6947 /* destroy no longer needed map */ 6948 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6949 6950 /* subassemble */ 6951 if (multilevel_allowed) { 6952 Vec vp[1]; 6953 PetscInt nvecs = 0; 6954 PetscBool reuse,reuser; 6955 6956 if (coarse_mat) reuse = PETSC_TRUE; 6957 else reuse = PETSC_FALSE; 6958 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6959 vp[0] = NULL; 6960 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6961 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6962 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6963 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6964 nvecs = 1; 6965 6966 if (pcbddc->divudotp) { 6967 Mat B,loc_divudotp; 6968 Vec v,p; 6969 IS dummy; 6970 PetscInt np; 6971 6972 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6973 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6974 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6975 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6976 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6977 ierr = VecSet(p,1.);CHKERRQ(ierr); 6978 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6979 ierr = VecDestroy(&p);CHKERRQ(ierr); 6980 ierr = MatDestroy(&B);CHKERRQ(ierr); 6981 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6982 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6983 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6984 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6985 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6986 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6987 ierr = VecDestroy(&v);CHKERRQ(ierr); 6988 } 6989 } 6990 if (reuser) { 6991 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6992 } else { 6993 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6994 } 6995 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6996 PetscScalar *arraym,*arrayv; 6997 PetscInt nl; 6998 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 6999 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7000 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7001 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7002 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7003 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7004 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7005 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7006 } else { 7007 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7008 } 7009 } else { 7010 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 7011 } 7012 if (coarse_mat_is || coarse_mat) { 7013 PetscMPIInt size; 7014 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7015 if (!multilevel_allowed) { 7016 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7017 } else { 7018 Mat A; 7019 7020 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7021 if (coarse_mat_is) { 7022 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7023 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7024 coarse_mat = coarse_mat_is; 7025 } 7026 /* be sure we don't have MatSeqDENSE as local mat */ 7027 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7028 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7029 } 7030 } 7031 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7032 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7033 7034 /* create local to global scatters for coarse problem */ 7035 if (compute_vecs) { 7036 PetscInt lrows; 7037 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7038 if (coarse_mat) { 7039 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7040 } else { 7041 lrows = 0; 7042 } 7043 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7044 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7045 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7046 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7047 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7048 } 7049 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7050 7051 /* set defaults for coarse KSP and PC */ 7052 if (multilevel_allowed) { 7053 coarse_ksp_type = KSPRICHARDSON; 7054 coarse_pc_type = PCBDDC; 7055 } else { 7056 coarse_ksp_type = KSPPREONLY; 7057 coarse_pc_type = PCREDUNDANT; 7058 } 7059 7060 /* print some info if requested */ 7061 if (pcbddc->dbg_flag) { 7062 if (!multilevel_allowed) { 7063 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7064 if (multilevel_requested) { 7065 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7066 } else if (pcbddc->max_levels) { 7067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7068 } 7069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7070 } 7071 } 7072 7073 /* create the coarse KSP object only once with defaults */ 7074 if (coarse_mat) { 7075 PetscViewer dbg_viewer = NULL; 7076 if (pcbddc->dbg_flag) { 7077 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7078 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7079 } 7080 if (!pcbddc->coarse_ksp) { 7081 char prefix[256],str_level[16]; 7082 size_t len; 7083 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7084 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7085 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7086 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7087 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7088 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7089 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7090 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7091 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7092 /* prefix */ 7093 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7094 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7095 if (!pcbddc->current_level) { 7096 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7097 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7098 } else { 7099 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7100 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7101 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7102 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7103 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7104 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7105 } 7106 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7107 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7108 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7109 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7110 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7111 /* allow user customization */ 7112 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7113 } 7114 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7115 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7116 if (nisdofs) { 7117 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7118 for (i=0;i<nisdofs;i++) { 7119 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7120 } 7121 } 7122 if (nisneu) { 7123 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7124 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7125 } 7126 if (nisvert) { 7127 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7128 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7129 } 7130 7131 /* get some info after set from options */ 7132 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7133 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7134 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7135 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7136 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7137 isbddc = PETSC_FALSE; 7138 } 7139 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7140 if (isredundant) { 7141 KSP inner_ksp; 7142 PC inner_pc; 7143 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7144 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7145 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7146 } 7147 7148 /* parameters which miss an API */ 7149 if (isbddc) { 7150 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7151 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7152 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7153 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7154 if (pcbddc_coarse->benign_saddle_point) { 7155 Mat coarsedivudotp_is; 7156 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7157 IS row,col; 7158 const PetscInt *gidxs; 7159 PetscInt n,st,M,N; 7160 7161 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7162 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7163 st = st-n; 7164 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7165 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7166 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7167 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7168 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7169 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7170 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7171 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7172 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7173 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7174 ierr = ISDestroy(&row);CHKERRQ(ierr); 7175 ierr = ISDestroy(&col);CHKERRQ(ierr); 7176 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7177 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7178 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7179 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7180 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7181 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7182 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7183 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7184 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7185 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7186 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7187 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7188 } 7189 } 7190 7191 /* propagate symmetry info of coarse matrix */ 7192 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7193 if (pc->pmat->symmetric_set) { 7194 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7195 } 7196 if (pc->pmat->hermitian_set) { 7197 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7198 } 7199 if (pc->pmat->spd_set) { 7200 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7201 } 7202 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7203 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7204 } 7205 /* set operators */ 7206 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7207 if (pcbddc->dbg_flag) { 7208 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7209 } 7210 } 7211 ierr = PetscFree(isarray);CHKERRQ(ierr); 7212 #if 0 7213 { 7214 PetscViewer viewer; 7215 char filename[256]; 7216 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7217 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7218 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7219 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7220 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7221 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7222 } 7223 #endif 7224 7225 if (pcbddc->coarse_ksp) { 7226 Vec crhs,csol; 7227 7228 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7229 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7230 if (!csol) { 7231 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7232 } 7233 if (!crhs) { 7234 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7235 } 7236 } 7237 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7238 7239 /* compute null space for coarse solver if the benign trick has been requested */ 7240 if (pcbddc->benign_null) { 7241 7242 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7243 for (i=0;i<pcbddc->benign_n;i++) { 7244 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7245 } 7246 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7247 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7248 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7249 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7250 if (coarse_mat) { 7251 Vec nullv; 7252 PetscScalar *array,*array2; 7253 PetscInt nl; 7254 7255 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7256 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7257 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7258 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7259 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7260 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7261 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7262 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7263 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7264 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7265 } 7266 } 7267 7268 if (pcbddc->coarse_ksp) { 7269 PetscBool ispreonly; 7270 7271 if (CoarseNullSpace) { 7272 PetscBool isnull; 7273 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7274 if (isnull) { 7275 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7276 } 7277 /* TODO: add local nullspaces (if any) */ 7278 } 7279 /* setup coarse ksp */ 7280 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7281 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7282 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7283 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7284 KSP check_ksp; 7285 KSPType check_ksp_type; 7286 PC check_pc; 7287 Vec check_vec,coarse_vec; 7288 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7289 PetscInt its; 7290 PetscBool compute_eigs; 7291 PetscReal *eigs_r,*eigs_c; 7292 PetscInt neigs; 7293 const char *prefix; 7294 7295 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7296 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7297 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7298 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7299 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7300 /* prevent from setup unneeded object */ 7301 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7302 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7303 if (ispreonly) { 7304 check_ksp_type = KSPPREONLY; 7305 compute_eigs = PETSC_FALSE; 7306 } else { 7307 check_ksp_type = KSPGMRES; 7308 compute_eigs = PETSC_TRUE; 7309 } 7310 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7311 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7312 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7313 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7314 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7315 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7316 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7317 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7318 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7319 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7320 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7321 /* create random vec */ 7322 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7323 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7324 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7325 /* solve coarse problem */ 7326 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7327 /* set eigenvalue estimation if preonly has not been requested */ 7328 if (compute_eigs) { 7329 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7330 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7331 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7332 if (neigs) { 7333 lambda_max = eigs_r[neigs-1]; 7334 lambda_min = eigs_r[0]; 7335 if (pcbddc->use_coarse_estimates) { 7336 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7337 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7338 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7339 } 7340 } 7341 } 7342 } 7343 7344 /* check coarse problem residual error */ 7345 if (pcbddc->dbg_flag) { 7346 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7347 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7348 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7349 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7350 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7351 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7352 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7353 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7354 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7355 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7356 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7357 if (CoarseNullSpace) { 7358 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7359 } 7360 if (compute_eigs) { 7361 PetscReal lambda_max_s,lambda_min_s; 7362 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7363 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7364 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7365 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7366 for (i=0;i<neigs;i++) { 7367 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7368 } 7369 } 7370 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7371 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7372 } 7373 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7374 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7375 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7376 if (compute_eigs) { 7377 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7378 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7379 } 7380 } 7381 } 7382 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7383 /* print additional info */ 7384 if (pcbddc->dbg_flag) { 7385 /* waits until all processes reaches this point */ 7386 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7387 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7388 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7389 } 7390 7391 /* free memory */ 7392 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7393 PetscFunctionReturn(0); 7394 } 7395 7396 #undef __FUNCT__ 7397 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7398 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7399 { 7400 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7401 PC_IS* pcis = (PC_IS*)pc->data; 7402 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7403 IS subset,subset_mult,subset_n; 7404 PetscInt local_size,coarse_size=0; 7405 PetscInt *local_primal_indices=NULL; 7406 const PetscInt *t_local_primal_indices; 7407 PetscErrorCode ierr; 7408 7409 PetscFunctionBegin; 7410 /* Compute global number of coarse dofs */ 7411 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7412 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7413 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7414 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7415 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7416 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7417 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7418 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7419 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7420 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7421 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7422 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7423 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7424 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7425 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7426 7427 /* check numbering */ 7428 if (pcbddc->dbg_flag) { 7429 PetscScalar coarsesum,*array,*array2; 7430 PetscInt i; 7431 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7432 7433 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7434 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7435 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7436 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7437 /* counter */ 7438 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7439 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7440 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7441 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7442 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7443 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7444 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7445 for (i=0;i<pcbddc->local_primal_size;i++) { 7446 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7447 } 7448 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7449 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7450 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7451 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7452 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7453 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7454 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7455 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7456 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7457 for (i=0;i<pcis->n;i++) { 7458 if (array[i] != 0.0 && array[i] != array2[i]) { 7459 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7460 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7461 set_error = PETSC_TRUE; 7462 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7463 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7464 } 7465 } 7466 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7467 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7468 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7469 for (i=0;i<pcis->n;i++) { 7470 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7471 } 7472 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7473 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7474 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7475 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7476 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7477 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7478 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7479 PetscInt *gidxs; 7480 7481 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7482 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7483 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7484 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7485 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7486 for (i=0;i<pcbddc->local_primal_size;i++) { 7487 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7488 } 7489 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7490 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7491 } 7492 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7493 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7494 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7495 } 7496 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7497 /* get back data */ 7498 *coarse_size_n = coarse_size; 7499 *local_primal_indices_n = local_primal_indices; 7500 PetscFunctionReturn(0); 7501 } 7502 7503 #undef __FUNCT__ 7504 #define __FUNCT__ "PCBDDCGlobalToLocal" 7505 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7506 { 7507 IS localis_t; 7508 PetscInt i,lsize,*idxs,n; 7509 PetscScalar *vals; 7510 PetscErrorCode ierr; 7511 7512 PetscFunctionBegin; 7513 /* get indices in local ordering exploiting local to global map */ 7514 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7515 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7516 for (i=0;i<lsize;i++) vals[i] = 1.0; 7517 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7518 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7519 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7520 if (idxs) { /* multilevel guard */ 7521 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7522 } 7523 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7524 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7525 ierr = PetscFree(vals);CHKERRQ(ierr); 7526 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7527 /* now compute set in local ordering */ 7528 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7529 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7530 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7531 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7532 for (i=0,lsize=0;i<n;i++) { 7533 if (PetscRealPart(vals[i]) > 0.5) { 7534 lsize++; 7535 } 7536 } 7537 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7538 for (i=0,lsize=0;i<n;i++) { 7539 if (PetscRealPart(vals[i]) > 0.5) { 7540 idxs[lsize++] = i; 7541 } 7542 } 7543 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7544 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7545 *localis = localis_t; 7546 PetscFunctionReturn(0); 7547 } 7548 7549 #undef __FUNCT__ 7550 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7551 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7552 { 7553 PC_IS *pcis=(PC_IS*)pc->data; 7554 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7555 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7556 Mat S_j; 7557 PetscInt *used_xadj,*used_adjncy; 7558 PetscBool free_used_adj; 7559 PetscErrorCode ierr; 7560 7561 PetscFunctionBegin; 7562 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7563 free_used_adj = PETSC_FALSE; 7564 if (pcbddc->sub_schurs_layers == -1) { 7565 used_xadj = NULL; 7566 used_adjncy = NULL; 7567 } else { 7568 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7569 used_xadj = pcbddc->mat_graph->xadj; 7570 used_adjncy = pcbddc->mat_graph->adjncy; 7571 } else if (pcbddc->computed_rowadj) { 7572 used_xadj = pcbddc->mat_graph->xadj; 7573 used_adjncy = pcbddc->mat_graph->adjncy; 7574 } else { 7575 PetscBool flg_row=PETSC_FALSE; 7576 const PetscInt *xadj,*adjncy; 7577 PetscInt nvtxs; 7578 7579 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7580 if (flg_row) { 7581 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7582 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7583 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7584 free_used_adj = PETSC_TRUE; 7585 } else { 7586 pcbddc->sub_schurs_layers = -1; 7587 used_xadj = NULL; 7588 used_adjncy = NULL; 7589 } 7590 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7591 } 7592 } 7593 7594 /* setup sub_schurs data */ 7595 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7596 if (!sub_schurs->schur_explicit) { 7597 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7598 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7599 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7600 } else { 7601 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7602 PetscBool isseqaij,need_change = PETSC_FALSE; 7603 PetscInt benign_n; 7604 Mat change = NULL; 7605 Vec scaling = NULL; 7606 IS change_primal = NULL; 7607 7608 if (!pcbddc->use_vertices && reuse_solvers) { 7609 PetscInt n_vertices; 7610 7611 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7612 reuse_solvers = (PetscBool)!n_vertices; 7613 } 7614 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7615 if (!isseqaij) { 7616 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7617 if (matis->A == pcbddc->local_mat) { 7618 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7619 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7620 } else { 7621 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7622 } 7623 } 7624 if (!pcbddc->benign_change_explicit) { 7625 benign_n = pcbddc->benign_n; 7626 } else { 7627 benign_n = 0; 7628 } 7629 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7630 We need a global reduction to avoid possible deadlocks. 7631 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7632 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7633 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7634 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7635 need_change = (PetscBool)(!need_change); 7636 } 7637 /* If the user defines additional constraints, we import them here. 7638 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7639 if (need_change) { 7640 PC_IS *pcisf; 7641 PC_BDDC *pcbddcf; 7642 PC pcf; 7643 7644 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7645 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7646 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7647 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7648 /* hacks */ 7649 pcisf = (PC_IS*)pcf->data; 7650 pcisf->is_B_local = pcis->is_B_local; 7651 pcisf->vec1_N = pcis->vec1_N; 7652 pcisf->BtoNmap = pcis->BtoNmap; 7653 pcisf->n = pcis->n; 7654 pcisf->n_B = pcis->n_B; 7655 pcbddcf = (PC_BDDC*)pcf->data; 7656 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7657 pcbddcf->mat_graph = pcbddc->mat_graph; 7658 pcbddcf->use_faces = PETSC_TRUE; 7659 pcbddcf->use_change_of_basis = PETSC_TRUE; 7660 pcbddcf->use_change_on_faces = PETSC_TRUE; 7661 pcbddcf->use_qr_single = PETSC_TRUE; 7662 pcbddcf->fake_change = PETSC_TRUE; 7663 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7664 /* store information on primal vertices and change of basis (in local numbering) */ 7665 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7666 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7667 change = pcbddcf->ConstraintMatrix; 7668 pcbddcf->ConstraintMatrix = NULL; 7669 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7670 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7671 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7672 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7673 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7674 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7675 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7676 pcf->ops->destroy = NULL; 7677 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7678 } 7679 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7680 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7681 ierr = MatDestroy(&change);CHKERRQ(ierr); 7682 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7683 } 7684 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7685 7686 /* free adjacency */ 7687 if (free_used_adj) { 7688 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7689 } 7690 PetscFunctionReturn(0); 7691 } 7692 7693 #undef __FUNCT__ 7694 #define __FUNCT__ "PCBDDCInitSubSchurs" 7695 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7696 { 7697 PC_IS *pcis=(PC_IS*)pc->data; 7698 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7699 PCBDDCGraph graph; 7700 PetscErrorCode ierr; 7701 7702 PetscFunctionBegin; 7703 /* attach interface graph for determining subsets */ 7704 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7705 IS verticesIS,verticescomm; 7706 PetscInt vsize,*idxs; 7707 7708 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7709 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7710 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7711 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7712 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7713 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7714 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7715 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7716 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7717 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7718 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7719 } else { 7720 graph = pcbddc->mat_graph; 7721 } 7722 /* print some info */ 7723 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 7724 IS vertices; 7725 PetscInt nv,nedges,nfaces; 7726 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7727 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7728 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7729 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7730 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7731 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7732 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7733 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7734 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7735 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7736 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7737 } 7738 7739 /* sub_schurs init */ 7740 if (!pcbddc->sub_schurs) { 7741 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7742 } 7743 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 7744 7745 /* free graph struct */ 7746 if (pcbddc->sub_schurs_rebuild) { 7747 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7748 } 7749 PetscFunctionReturn(0); 7750 } 7751 7752 #undef __FUNCT__ 7753 #define __FUNCT__ "PCBDDCCheckOperator" 7754 PetscErrorCode PCBDDCCheckOperator(PC pc) 7755 { 7756 PC_IS *pcis=(PC_IS*)pc->data; 7757 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7758 PetscErrorCode ierr; 7759 7760 PetscFunctionBegin; 7761 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7762 IS zerodiag = NULL; 7763 Mat S_j,B0_B=NULL; 7764 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7765 PetscScalar *p0_check,*array,*array2; 7766 PetscReal norm; 7767 PetscInt i; 7768 7769 /* B0 and B0_B */ 7770 if (zerodiag) { 7771 IS dummy; 7772 7773 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7774 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7775 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7776 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7777 } 7778 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7779 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7780 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7781 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7782 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7783 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7784 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7785 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7786 /* S_j */ 7787 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7788 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7789 7790 /* mimic vector in \widetilde{W}_\Gamma */ 7791 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7792 /* continuous in primal space */ 7793 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7794 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7795 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7796 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7797 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7798 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7799 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7800 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7801 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7802 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7803 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7804 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7805 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7806 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7807 7808 /* assemble rhs for coarse problem */ 7809 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7810 /* local with Schur */ 7811 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7812 if (zerodiag) { 7813 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7814 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7815 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7816 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7817 } 7818 /* sum on primal nodes the local contributions */ 7819 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7820 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7821 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7822 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7823 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7824 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7825 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7826 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7827 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7828 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7829 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7830 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7831 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7832 /* scale primal nodes (BDDC sums contibutions) */ 7833 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7834 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7835 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7836 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7837 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7838 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7839 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7840 /* global: \widetilde{B0}_B w_\Gamma */ 7841 if (zerodiag) { 7842 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7843 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7844 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7845 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7846 } 7847 /* BDDC */ 7848 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7849 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7850 7851 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7852 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7853 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7854 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7855 for (i=0;i<pcbddc->benign_n;i++) { 7856 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7857 } 7858 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7859 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7860 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7861 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7862 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7863 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7864 } 7865 PetscFunctionReturn(0); 7866 } 7867