1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 if (!esize) PetscFunctionReturn(0); 84 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 85 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 86 87 /* gradients */ 88 ptr = work + 5*esize; 89 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 90 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 91 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 92 ierr = MatDestroy(&GE);CHKERRQ(ierr); 93 94 /* constants */ 95 ptr += rsize*csize; 96 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 97 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 99 ierr = MatDestroy(&GE);CHKERRQ(ierr); 100 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 101 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "PCBDDCNedelecSupport" 107 PetscErrorCode PCBDDCNedelecSupport(PC pc) 108 { 109 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 110 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 111 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 112 MatNullSpace nnsp; 113 Vec tvec,*quads; 114 PetscSF sfv; 115 ISLocalToGlobalMapping el2g,vl2g,fl2g; 116 MPI_Comm comm; 117 IS lned,primals,allprimals,nedfieldlocal; 118 IS *eedges,*extrows,*extcols,*alleedges; 119 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee; 120 PetscScalar *vals,*work; 121 PetscReal *rwork; 122 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 123 PetscInt ne,nv,Lv,order,n,field; 124 PetscInt n_neigh,*neigh,*n_shared,**shared; 125 PetscInt i,j,extmem,cum,maxsize,rst,nee,nquads=2; 126 PetscInt *extrow,*extrowcum,*marks,*emarks,*vmarks,*gidxs; 127 PetscInt *sfvleaves,*sfvroots; 128 PetscBool print,eerr,done,lrc[2],conforming; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 /* test variable order code and print debug info TODO: to be removed */ 133 print = PETSC_FALSE; 134 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 135 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 136 137 /* Return to caller if there are no edges in the decomposition */ 138 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 139 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 140 ierr = ISLocalToGlobalMappingGetSize(el2g,&n);CHKERRQ(ierr); 141 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 142 lrc[0] = PETSC_FALSE; 143 for (i=0;i<n;i++) { 144 if (PetscRealPart(vals[i]) > 2.) { 145 lrc[0] = PETSC_TRUE; 146 break; 147 } 148 } 149 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 150 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 151 if (!lrc[1]) PetscFunctionReturn(0); 152 153 /* Get discrete gradient 154 If it is defined for a subset of dofs, assumes G is given in global ordering for all the dofs */ 155 G = pcbddc->discretegradient; 156 order = pcbddc->nedorder; 157 conforming = pcbddc->conforming; 158 field = pcbddc->nedfield; 159 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 160 if (pcbddc->n_ISForDofsLocal && field > -1) { 161 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 162 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 163 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 164 } else { 165 PetscBool testnedfield = PETSC_FALSE; 166 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 167 if (!testnedfield) { 168 ne = n; 169 nedfieldlocal = NULL; 170 } else { 171 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 172 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 173 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 174 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 175 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 176 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 177 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 178 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 179 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 180 for (i=0,cum=0;i<n;i++) { 181 if (matis->sf_leafdata[i] > 1) { 182 matis->sf_leafdata[cum++] = i; 183 } 184 } 185 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 186 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 187 } 188 } 189 190 if (nedfieldlocal) { /* merge with previous code when testing is done */ 191 IS is; 192 193 /* need to map from the local Nedelec field to local numbering */ 194 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 195 /* need to map from the local Nedelec field to global numbering */ 196 ierr = ISLocalToGlobalMappingApplyIS(el2g,nedfieldlocal,&is);CHKERRQ(ierr); 197 ierr = ISLocalToGlobalMappingCreateIS(is,&el2g);CHKERRQ(ierr); 198 ierr = ISDestroy(&is);CHKERRQ(ierr); 199 } else { 200 ierr = PetscObjectReference((PetscObject)el2g);CHKERRQ(ierr); 201 fl2g = NULL; 202 } 203 204 /* Sanity checks */ 205 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 206 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 207 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 208 209 /* Drop connections for interior edges (this modifies G) */ 210 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 211 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 212 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 213 if (nedfieldlocal) { 214 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 215 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 216 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 217 } else { 218 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 219 } 220 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 221 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 222 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 223 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 224 if (matis->sf_rootdata[i] < 2) { 225 matis->sf_rootdata[cum++] = i + rst; 226 } 227 } 228 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 229 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 230 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 231 232 /* Extract subdomain relevant rows of G */ 233 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 234 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 235 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 236 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 237 ierr = ISDestroy(&lned);CHKERRQ(ierr); 238 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 239 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 240 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 241 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 242 if (print) { 243 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 244 ierr = MatView(lG,NULL);CHKERRQ(ierr); 245 } 246 247 /* SF for nodal communications */ 248 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 249 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 250 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 251 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 252 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 253 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 254 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 255 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 257 258 /* Destroy temporary G created in MATIS format */ 259 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 260 261 /* Save lG */ 262 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 263 264 /* Analyze the edge-nodes connections (duplicate lG) */ 265 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 266 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 267 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 268 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 269 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 270 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 271 /* need to import the boundary specification to ensure the 272 proper detection of coarse edges' endpoints */ 273 if (pcbddc->DirichletBoundariesLocal) { 274 IS is; 275 276 if (fl2g) { 277 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 278 } else { 279 is = pcbddc->DirichletBoundariesLocal; 280 } 281 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 283 for (i=0;i<cum;i++) { 284 if (idxs[i] >= 0) { 285 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 286 } 287 } 288 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 289 if (fl2g) { 290 ierr = ISDestroy(&is);CHKERRQ(ierr); 291 } 292 } 293 if (pcbddc->NeumannBoundariesLocal) { 294 IS is; 295 296 if (fl2g) { 297 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 298 } else { 299 is = pcbddc->NeumannBoundariesLocal; 300 } 301 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 302 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 303 for (i=0;i<cum;i++) { 304 if (idxs[i] >= 0) { 305 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 306 } 307 } 308 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 309 if (fl2g) { 310 ierr = ISDestroy(&is);CHKERRQ(ierr); 311 } 312 } 313 314 /* need to remove coarse faces' dofs to ensure the 315 proper detection of coarse edges' endpoints */ 316 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 317 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 318 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 319 for (i=1;i<n_neigh;i++) 320 for (j=0;j<n_shared[i];j++) 321 marks[shared[i][j]]++; 322 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 323 for (i=0;i<ne;i++) { 324 if (marks[i] > 1 || (marks[i] == 1 && PetscBTLookup(btb,i))) { 325 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 326 } 327 } 328 329 if (!conforming) { 330 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 331 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 332 } 333 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 334 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 335 cum = 0; 336 for (i=0;i<ne;i++) { 337 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 338 if (!PetscBTLookup(btee,i)) { 339 marks[cum++] = i; 340 continue; 341 } 342 /* set badly connected edge dofs as primal */ 343 if (!conforming) { 344 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 345 marks[cum++] = i; 346 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 347 for (j=ii[i];j<ii[i+1];j++) { 348 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 349 } 350 } else { 351 /* every edge dofs should be connected trough a certain number of nodal dofs 352 to other edge dofs belonging to coarse edges 353 - at most 2 endpoints 354 - order-1 interior nodal dofs 355 - no undefined nodal dofs (nconn < order) 356 */ 357 PetscInt ends = 0,ints = 0, undef = 0; 358 for (j=ii[i];j<ii[i+1];j++) { 359 PetscInt v = jj[j],k; 360 PetscInt nconn = iit[v+1]-iit[v]; 361 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 362 if (nconn > order) ends++; 363 else if (nconn == order) ints++; 364 else undef++; 365 } 366 if (undef || ends > 2 || ints != order -1) { 367 marks[cum++] = i; 368 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 369 for (j=ii[i];j<ii[i+1];j++) { 370 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 371 } 372 } 373 } 374 } 375 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 376 if (!order && ii[i+1] != ii[i]) { 377 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 378 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 379 } 380 } 381 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 382 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 383 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 384 if (!conforming) { 385 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 386 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 387 } 388 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 389 /* identify splitpoints and corner candidates: TODO variable order */ 390 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 391 if (print) { 392 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 393 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 394 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 395 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 396 } 397 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 398 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 399 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 400 for (i=0;i<nv;i++) { 401 PetscInt ord = order, test = ii[i+1]-ii[i]; 402 if (!order) { 403 PetscReal vorder = 0.; 404 405 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 406 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 407 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 408 ord = 1; 409 } 410 #if defined(PETSC_USE_DEBUG) 411 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 412 #endif 413 if (test >= 3*ord) { /* splitpoints */ 414 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 415 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 416 } else if (test == ord) { 417 if (order == 1) { 418 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 419 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 420 } else { 421 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 422 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 423 } 424 } 425 } 426 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 427 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 428 429 /* Get the local G^T explicitly */ 430 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 431 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 432 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 433 434 /* Mark interior nodal dofs */ 435 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 436 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 437 for (i=1;i<n_neigh;i++) { 438 for (j=0;j<n_shared[i];j++) { 439 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 440 } 441 } 442 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 443 444 /* communicate corners and splitpoints */ 445 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 446 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 447 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 448 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 449 450 if (print) { 451 IS tbz; 452 453 cum = 0; 454 for (i=0;i<nv;i++) 455 if (sfvleaves[i]) 456 vmarks[cum++] = i; 457 458 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 459 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 460 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 461 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 462 } 463 464 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 465 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 466 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 467 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 468 469 /* Zero rows of lGt corresponding to identified corners 470 and interior nodal dofs */ 471 cum = 0; 472 for (i=0;i<nv;i++) { 473 if (sfvleaves[i]) { 474 vmarks[cum++] = i; 475 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 476 } 477 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 478 } 479 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 480 if (print) { 481 IS tbz; 482 483 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 484 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 485 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 486 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 487 } 488 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 489 ierr = PetscFree(vmarks);CHKERRQ(ierr); 490 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 491 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 492 493 /* Recompute G */ 494 ierr = MatDestroy(&lG);CHKERRQ(ierr); 495 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 496 if (print) { 497 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 498 ierr = MatView(lG,NULL);CHKERRQ(ierr); 499 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 500 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 501 } 502 503 /* Get primal dofs (if any) */ 504 cum = 0; 505 for (i=0;i<ne;i++) { 506 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 507 } 508 if (fl2g) { 509 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 510 } 511 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 512 if (print) { 513 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 514 ierr = ISView(primals,NULL);CHKERRQ(ierr); 515 } 516 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 517 /* TODO: what if the user passed in some of them ? */ 518 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 519 ierr = ISDestroy(&primals);CHKERRQ(ierr); 520 521 /* Compute edge connectivity */ 522 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 523 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 524 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 525 if (done && i && ii && jj) { /* when lG is empty, don't pass pointers */ 526 if (fl2g) { 527 PetscBT btf; 528 PetscInt *iia,*jja,*iiu,*jju; 529 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 530 531 /* create CSR for all local dofs */ 532 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 533 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 534 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 535 iiu = pcbddc->mat_graph->xadj; 536 jju = pcbddc->mat_graph->adjncy; 537 } else if (pcbddc->use_local_adj) { 538 rest = PETSC_TRUE; 539 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 540 } else { 541 free = PETSC_TRUE; 542 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 543 iiu[0] = 0; 544 for (i=0;i<n;i++) { 545 iiu[i+1] = i+1; 546 jju[i] = -1; 547 } 548 } 549 550 /* import sizes of CSR */ 551 iia[0] = 0; 552 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 553 554 /* overwrite entries corresponding to the Nedelec field */ 555 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 556 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 557 for (i=0;i<ne;i++) { 558 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 559 iia[idxs[i]+1] = ii[i+1]-ii[i]; 560 } 561 562 /* iia in CSR */ 563 for (i=0;i<n;i++) iia[i+1] += iia[i]; 564 565 /* jja in CSR */ 566 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 567 for (i=0;i<n;i++) 568 if (!PetscBTLookup(btf,i)) 569 for (j=0;j<iiu[i+1]-iiu[i];j++) 570 jja[iia[i]+j] = jju[iiu[i]+j]; 571 572 /* map edge dofs connectivity */ 573 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 574 for (i=0;i<ne;i++) { 575 PetscInt e = idxs[i]; 576 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 577 } 578 579 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 580 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 581 if (rest) { 582 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 583 } 584 if (free) { 585 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 586 } 587 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 588 } else { 589 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 590 } 591 } 592 593 /* Analyze interface for edge dofs */ 594 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 595 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 596 597 /* Get coarse edges in the edge space */ 598 pcbddc->mat_graph->twodim = PETSC_FALSE; 599 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 600 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 601 602 if (fl2g) { 603 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 604 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 605 for (i=0;i<nee;i++) { 606 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 607 } 608 } else { 609 eedges = alleedges; 610 primals = allprimals; 611 } 612 613 /* Mark fine edge dofs with their coarse edge id */ 614 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 615 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 616 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 617 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 618 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 619 if (print) { 620 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 621 ierr = ISView(primals,NULL);CHKERRQ(ierr); 622 } 623 624 maxsize = 0; 625 for (i=0;i<nee;i++) { 626 PetscInt size,mark = i+1; 627 628 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 629 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 630 for (j=0;j<size;j++) marks[idxs[j]] = mark; 631 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 632 maxsize = PetscMax(maxsize,size); 633 } 634 635 /* Find coarse edge endpoints */ 636 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 637 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 638 for (i=0;i<nee;i++) { 639 PetscInt mark = i+1,size; 640 641 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 642 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 643 if (print) { 644 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 645 ISView(eedges[i],NULL); 646 } 647 for (j=0;j<size;j++) { 648 PetscInt k, ee = idxs[j]; 649 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 650 for (k=ii[ee];k<ii[ee+1];k++) { 651 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 652 if (PetscBTLookup(btv,jj[k])) { 653 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 654 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 655 PetscInt k2; 656 PetscBool corner = PETSC_FALSE; 657 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 658 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 659 /* it's a corner if either is connected with an edge dof belonging to a different cc or 660 if the edge dof lie on the natural part of the boundary */ 661 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 662 corner = PETSC_TRUE; 663 break; 664 } 665 } 666 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 667 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 668 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 669 } else { 670 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 671 } 672 } 673 } 674 } 675 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 676 } 677 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 678 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 679 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 680 681 /* Reset marked primal dofs */ 682 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 683 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 684 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 685 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 686 687 /* Now use the initial lG */ 688 ierr = MatDestroy(&lG);CHKERRQ(ierr); 689 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 690 lG = lGinit; 691 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 692 693 /* Compute extended cols indices */ 694 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 695 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 696 i *= maxsize; 697 ierr = PetscMalloc1(nee,&extcols);CHKERRQ(ierr); 698 ierr = PetscCalloc1(nee,&emarks);CHKERRQ(ierr); 699 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 700 eerr = PETSC_FALSE; 701 for (i=0;i<nee;i++) { 702 PetscInt size; 703 704 cum = 0; 705 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 706 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 707 for (j=0;j<size;j++) { 708 PetscInt k,ee = idxs[j]; 709 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 710 } 711 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 712 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 713 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 714 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 715 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 716 /* it may happen that endpoints are not defined at this point 717 if it is the case, mark this edge for a second pass */ 718 if (cum != size -1) { 719 emarks[i] = 1; 720 if (print) { 721 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 722 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 723 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 724 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 725 } 726 eerr = PETSC_TRUE; 727 } 728 } 729 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 730 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 731 if (done) { 732 PetscInt *newprimals; 733 734 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 735 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 736 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 737 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 738 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 739 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 740 for (i=0;i<nee;i++) { 741 if (emarks[i]) { 742 PetscInt size,mark = i+1; 743 744 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 745 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 746 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 747 for (j=0;j<size;j++) { 748 PetscInt k,ee = idxs[j]; 749 for (k=ii[ee];k<ii[ee+1];k++) { 750 /* set all candidates located on the edge as corners */ 751 if (PetscBTLookup(btvcand,jj[k])) { 752 PetscInt k2,vv = jj[k]; 753 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 754 /* set all edge dofs connected to candidate as primals */ 755 for (k2=iit[vv];k2<iit[vv+1];k2++) { 756 if (marks[jjt[k2]] == mark) { 757 PetscInt k3,ee2 = jjt[k2]; 758 newprimals[cum++] = ee2; 759 /* finally set the new corners */ 760 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 761 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 762 } 763 } 764 } 765 } 766 } 767 } 768 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 769 } 770 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 771 } 772 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 773 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 774 if (fl2g) { 775 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 776 ierr = ISDestroy(&primals);CHKERRQ(ierr); 777 for (i=0;i<nee;i++) { 778 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 779 } 780 ierr = PetscFree(eedges);CHKERRQ(ierr); 781 } 782 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 783 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 784 ierr = PetscFree(newprimals);CHKERRQ(ierr); 785 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 786 ierr = ISDestroy(&primals);CHKERRQ(ierr); 787 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 788 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 789 if (fl2g) { 790 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 791 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 792 for (i=0;i<nee;i++) { 793 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 794 } 795 } else { 796 eedges = alleedges; 797 primals = allprimals; 798 } 799 800 /* Mark again */ 801 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 802 for (i=0;i<nee;i++) { 803 PetscInt size,mark = i+1; 804 805 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 806 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 807 for (j=0;j<size;j++) marks[idxs[j]] = mark; 808 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 809 } 810 if (print) { 811 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 812 ierr = ISView(primals,NULL);CHKERRQ(ierr); 813 } 814 815 /* Recompute extended cols */ 816 eerr = PETSC_FALSE; 817 for (i=0;i<nee;i++) { 818 PetscInt size; 819 820 cum = 0; 821 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 822 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 823 for (j=0;j<size;j++) { 824 PetscInt k,ee = idxs[j]; 825 for (k=ii[ee];k<ii[ee+1];k++) { 826 if (!PetscBTLookup(btv,jj[k])) { 827 extrow[cum++] = jj[k]; 828 } 829 } 830 } 831 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 832 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 833 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 834 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 835 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 836 if (cum != size -1) { 837 if (print) { 838 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 839 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 840 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 841 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 842 } 843 eerr = PETSC_TRUE; 844 } 845 } 846 } 847 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 848 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 849 ierr = PetscFree(emarks);CHKERRQ(ierr); 850 /* an error should not occur at this point */ 851 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 852 853 /* Check the number of endpoints */ 854 /* TODO: fix case for circular edge */ 855 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 856 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 857 for (i=0;i<nee;i++) { 858 PetscInt size, found = 0; 859 860 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 861 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 862 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 863 for (j=0;j<size;j++) { 864 PetscInt k,ee = idxs[j]; 865 for (k=ii[ee];k<ii[ee+1];k++) { 866 PetscInt vv = jj[k]; 867 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 868 found++; 869 } 870 } 871 } 872 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 873 if (found != 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d\n",found,i); 874 } 875 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 876 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 877 878 #if defined(PETSC_USE_DEBUG) 879 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 880 not interfere with neighbouring coarse edges */ 881 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 883 for (i=0;i<nv;i++) { 884 PetscInt emax = 0,eemax = 0; 885 886 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 887 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 888 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 889 for (j=1;j<nee+1;j++) { 890 if (emax < emarks[j]) { 891 emax = emarks[j]; 892 eemax = j; 893 } 894 } 895 /* not relevant for edges */ 896 if (!eemax) continue; 897 898 for (j=ii[i];j<ii[i+1];j++) { 899 if (marks[jj[j]] && marks[jj[j]] != eemax) { 900 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 901 } 902 } 903 } 904 ierr = PetscFree(emarks);CHKERRQ(ierr); 905 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 906 #endif 907 908 /* Compute extended rows indices for edge blocks of the change of basis */ 909 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 910 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 911 extmem *= maxsize; 912 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 913 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 914 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 915 for (i=0;i<nv;i++) { 916 PetscInt mark = 0,size,start; 917 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 918 for (j=ii[i];j<ii[i+1];j++) 919 if (marks[jj[j]] && !mark) 920 mark = marks[jj[j]]; 921 922 /* not relevant */ 923 if (!mark) continue; 924 925 /* import extended row */ 926 mark--; 927 start = mark*extmem+extrowcum[mark]; 928 size = ii[i+1]-ii[i]; 929 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 930 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 931 extrowcum[mark] += size; 932 } 933 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 934 cum = 0; 935 for (i=0;i<nee;i++) { 936 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 937 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 938 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 939 cum = PetscMax(cum,size); 940 } 941 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 942 ierr = PetscFree(marks);CHKERRQ(ierr); 943 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 944 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 945 946 /* Workspace for lapack inner calls and VecSetValues */ 947 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 948 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 949 for (i=0;i<maxsize;i++) vals[i] = 1.; 950 951 /* Create vectors for quadrature rules */ 952 /* TODO preserve other quadratures */ 953 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 954 for (i=0;i<nquads;i++) { 955 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 956 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 957 } 958 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 959 960 /* Create change of basis matrix (preallocation can be improved) */ 961 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 962 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 963 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 964 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 965 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 966 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 967 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 968 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 969 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 970 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 971 972 /* Defaults to identity */ 973 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 974 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 975 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 976 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 977 978 for (i=0;i<nee;i++) { 979 Mat Gins = NULL, GKins = NULL; 980 981 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 982 if (Gins && GKins) { 983 PetscScalar *data; 984 const PetscInt *rows,*cols; 985 PetscInt nrh,nch,nrc,ncc; 986 987 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 988 /* H1 */ 989 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 990 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 991 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 992 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 993 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 994 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 995 /* complement */ 996 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 997 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 998 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 999 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1000 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1001 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1002 /* Gins kernel quadratures */ 1003 for (j=0;j<ncc;j++) { 1004 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 1005 } 1006 /* H1 average */ 1007 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1008 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1009 } 1010 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1011 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1012 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1013 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1014 } 1015 1016 /* Start assembling */ 1017 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1018 for (i=0;i<nquads;i++) { 1019 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 1020 } 1021 1022 /* Free */ 1023 if (fl2g) { 1024 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1025 for (i=0;i<nee;i++) { 1026 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1027 } 1028 ierr = PetscFree(eedges);CHKERRQ(ierr); 1029 } 1030 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1031 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1032 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1033 ierr = PetscFree(extrow);CHKERRQ(ierr); 1034 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1035 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1036 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1037 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1038 ierr = PetscFree(vals);CHKERRQ(ierr); 1039 ierr = PetscFree(extrows);CHKERRQ(ierr); 1040 ierr = PetscFree(extcols);CHKERRQ(ierr); 1041 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1042 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1043 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1044 1045 /* Complete assembling */ 1046 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1047 for (i=0;i<nquads;i++) { 1048 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 1049 } 1050 for (i=0;i<nquads;i++) { 1051 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 1052 } 1053 ierr = PetscFree(quads);CHKERRQ(ierr); 1054 1055 /* set change of basis */ 1056 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1057 ierr = MatDestroy(&T);CHKERRQ(ierr); 1058 1059 /* set quadratures */ 1060 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 1061 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 1062 1063 PetscFunctionReturn(0); 1064 } 1065 1066 /* the near-null space of BDDC carries information on quadrature weights, 1067 and these can be collinear -> so cheat with MatNullSpaceCreate 1068 and create a suitable set of basis vectors first */ 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1071 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1072 { 1073 PetscErrorCode ierr; 1074 PetscInt i; 1075 1076 PetscFunctionBegin; 1077 for (i=0;i<nvecs;i++) { 1078 PetscInt first,last; 1079 1080 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1081 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1082 if (i>=first && i < last) { 1083 PetscScalar *data; 1084 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1085 if (!has_const) { 1086 data[i-first] = 1.; 1087 } else { 1088 data[2*i-first] = 1./PetscSqrtReal(2.); 1089 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1090 } 1091 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1092 } 1093 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1094 } 1095 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1096 for (i=0;i<nvecs;i++) { /* reset vectors */ 1097 PetscInt first,last; 1098 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1099 if (i>=first && i < last) { 1100 PetscScalar *data; 1101 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1102 if (!has_const) { 1103 data[i-first] = 0.; 1104 } else { 1105 data[2*i-first] = 0.; 1106 data[2*i-first+1] = 0.; 1107 } 1108 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1109 } 1110 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1117 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1118 { 1119 Mat loc_divudotp; 1120 Vec p,v,vins,quad_vec,*quad_vecs; 1121 ISLocalToGlobalMapping map; 1122 IS *faces,*edges; 1123 PetscScalar *vals; 1124 const PetscScalar *array; 1125 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1126 PetscMPIInt rank; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1131 if (graph->twodim) { 1132 lmaxneighs = 2; 1133 } else { 1134 lmaxneighs = 1; 1135 for (i=0;i<ne;i++) { 1136 const PetscInt *idxs; 1137 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1138 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1139 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1140 } 1141 lmaxneighs++; /* graph count does not include self */ 1142 } 1143 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1144 maxsize = 0; 1145 for (i=0;i<ne;i++) { 1146 PetscInt nn; 1147 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1148 maxsize = PetscMax(maxsize,nn); 1149 } 1150 for (i=0;i<nf;i++) { 1151 PetscInt nn; 1152 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1153 maxsize = PetscMax(maxsize,nn); 1154 } 1155 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1156 /* create vectors to hold quadrature weights */ 1157 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1158 if (!transpose) { 1159 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1160 } else { 1161 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1162 } 1163 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1164 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1165 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1166 for (i=0;i<maxneighs;i++) { 1167 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1168 } 1169 1170 /* compute local quad vec */ 1171 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1172 if (!transpose) { 1173 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1174 } else { 1175 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1176 } 1177 ierr = VecSet(p,1.);CHKERRQ(ierr); 1178 if (!transpose) { 1179 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1180 } else { 1181 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1182 } 1183 if (vl2l) { 1184 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1185 } else { 1186 vins = v; 1187 } 1188 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1189 ierr = VecDestroy(&p);CHKERRQ(ierr); 1190 1191 /* insert in global quadrature vecs */ 1192 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1193 for (i=0;i<nf;i++) { 1194 const PetscInt *idxs; 1195 PetscInt idx,nn,j; 1196 1197 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1198 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1199 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1200 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1201 idx = -(idx+1); 1202 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1203 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1204 } 1205 for (i=0;i<ne;i++) { 1206 const PetscInt *idxs; 1207 PetscInt idx,nn,j; 1208 1209 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1210 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1211 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1212 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1213 idx = -(idx+1); 1214 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1215 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1216 } 1217 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1218 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1219 if (vl2l) { 1220 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1221 } 1222 ierr = VecDestroy(&v);CHKERRQ(ierr); 1223 ierr = PetscFree(vals);CHKERRQ(ierr); 1224 1225 /* assemble near null space */ 1226 for (i=0;i<maxneighs;i++) { 1227 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1228 } 1229 for (i=0;i<maxneighs;i++) { 1230 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1231 } 1232 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1239 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1240 { 1241 PetscErrorCode ierr; 1242 Vec local,global; 1243 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1244 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1245 1246 PetscFunctionBegin; 1247 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1248 /* need to convert from global to local topology information and remove references to information in global ordering */ 1249 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1250 if (pcbddc->user_provided_isfordofs) { 1251 if (pcbddc->n_ISForDofs) { 1252 PetscInt i; 1253 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1254 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1255 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1256 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1257 } 1258 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1259 pcbddc->n_ISForDofs = 0; 1260 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1261 } 1262 } else { 1263 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1264 PetscInt i, n = matis->A->rmap->n; 1265 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1266 if (i > 1) { 1267 pcbddc->n_ISForDofsLocal = i; 1268 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1269 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1270 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1271 } 1272 } 1273 } 1274 } 1275 1276 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1277 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1278 } 1279 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1280 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1281 } 1282 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1283 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1284 } 1285 ierr = VecDestroy(&global);CHKERRQ(ierr); 1286 ierr = VecDestroy(&local);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1292 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1293 { 1294 PC_IS *pcis = (PC_IS*)(pc->data); 1295 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1296 PetscErrorCode ierr; 1297 1298 PetscFunctionBegin; 1299 if (!pcbddc->benign_have_null) { 1300 PetscFunctionReturn(0); 1301 } 1302 if (pcbddc->ChangeOfBasisMatrix) { 1303 Vec swap; 1304 1305 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1306 swap = pcbddc->work_change; 1307 pcbddc->work_change = r; 1308 r = swap; 1309 } 1310 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1311 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1312 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1313 ierr = VecSet(z,0.);CHKERRQ(ierr); 1314 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1315 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1316 if (pcbddc->ChangeOfBasisMatrix) { 1317 Vec swap; 1318 1319 swap = r; 1320 r = pcbddc->work_change; 1321 pcbddc->work_change = swap; 1322 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1323 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1324 } 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1330 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1331 { 1332 PCBDDCBenignMatMult_ctx ctx; 1333 PetscErrorCode ierr; 1334 PetscBool apply_right,apply_left,reset_x; 1335 1336 PetscFunctionBegin; 1337 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1338 if (transpose) { 1339 apply_right = ctx->apply_left; 1340 apply_left = ctx->apply_right; 1341 } else { 1342 apply_right = ctx->apply_right; 1343 apply_left = ctx->apply_left; 1344 } 1345 reset_x = PETSC_FALSE; 1346 if (apply_right) { 1347 const PetscScalar *ax; 1348 PetscInt nl,i; 1349 1350 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1351 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1352 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1353 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1354 for (i=0;i<ctx->benign_n;i++) { 1355 PetscScalar sum,val; 1356 const PetscInt *idxs; 1357 PetscInt nz,j; 1358 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1359 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1360 sum = 0.; 1361 if (ctx->apply_p0) { 1362 val = ctx->work[idxs[nz-1]]; 1363 for (j=0;j<nz-1;j++) { 1364 sum += ctx->work[idxs[j]]; 1365 ctx->work[idxs[j]] += val; 1366 } 1367 } else { 1368 for (j=0;j<nz-1;j++) { 1369 sum += ctx->work[idxs[j]]; 1370 } 1371 } 1372 ctx->work[idxs[nz-1]] -= sum; 1373 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1374 } 1375 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1376 reset_x = PETSC_TRUE; 1377 } 1378 if (transpose) { 1379 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1380 } else { 1381 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1382 } 1383 if (reset_x) { 1384 ierr = VecResetArray(x);CHKERRQ(ierr); 1385 } 1386 if (apply_left) { 1387 PetscScalar *ay; 1388 PetscInt i; 1389 1390 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1391 for (i=0;i<ctx->benign_n;i++) { 1392 PetscScalar sum,val; 1393 const PetscInt *idxs; 1394 PetscInt nz,j; 1395 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1396 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1397 val = -ay[idxs[nz-1]]; 1398 if (ctx->apply_p0) { 1399 sum = 0.; 1400 for (j=0;j<nz-1;j++) { 1401 sum += ay[idxs[j]]; 1402 ay[idxs[j]] += val; 1403 } 1404 ay[idxs[nz-1]] += sum; 1405 } else { 1406 for (j=0;j<nz-1;j++) { 1407 ay[idxs[j]] += val; 1408 } 1409 ay[idxs[nz-1]] = 0.; 1410 } 1411 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1412 } 1413 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1414 } 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1420 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1421 { 1422 PetscErrorCode ierr; 1423 1424 PetscFunctionBegin; 1425 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1431 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1432 { 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "PCBDDCBenignShellMat" 1442 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1443 { 1444 PC_IS *pcis = (PC_IS*)pc->data; 1445 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1446 PCBDDCBenignMatMult_ctx ctx; 1447 PetscErrorCode ierr; 1448 1449 PetscFunctionBegin; 1450 if (!restore) { 1451 Mat A_IB,A_BI; 1452 PetscScalar *work; 1453 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1454 1455 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1456 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1457 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1458 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1459 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1460 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1461 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1462 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1463 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1464 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1465 ctx->apply_left = PETSC_TRUE; 1466 ctx->apply_right = PETSC_FALSE; 1467 ctx->apply_p0 = PETSC_FALSE; 1468 ctx->benign_n = pcbddc->benign_n; 1469 if (reuse) { 1470 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1471 ctx->free = PETSC_FALSE; 1472 } else { /* TODO: could be optimized for successive solves */ 1473 ISLocalToGlobalMapping N_to_D; 1474 PetscInt i; 1475 1476 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1477 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1478 for (i=0;i<pcbddc->benign_n;i++) { 1479 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1480 } 1481 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1482 ctx->free = PETSC_TRUE; 1483 } 1484 ctx->A = pcis->A_IB; 1485 ctx->work = work; 1486 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1487 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1488 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1489 pcis->A_IB = A_IB; 1490 1491 /* A_BI as A_IB^T */ 1492 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1493 pcbddc->benign_original_mat = pcis->A_BI; 1494 pcis->A_BI = A_BI; 1495 } else { 1496 if (!pcbddc->benign_original_mat) { 1497 PetscFunctionReturn(0); 1498 } 1499 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1500 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1501 pcis->A_IB = ctx->A; 1502 ctx->A = NULL; 1503 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1504 pcis->A_BI = pcbddc->benign_original_mat; 1505 pcbddc->benign_original_mat = NULL; 1506 if (ctx->free) { 1507 PetscInt i; 1508 for (i=0;i<ctx->benign_n;i++) { 1509 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1510 } 1511 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1512 } 1513 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1514 ierr = PetscFree(ctx);CHKERRQ(ierr); 1515 } 1516 PetscFunctionReturn(0); 1517 } 1518 1519 /* used just in bddc debug mode */ 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "PCBDDCBenignProject" 1522 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1523 { 1524 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1525 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1526 Mat An; 1527 PetscErrorCode ierr; 1528 1529 PetscFunctionBegin; 1530 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1531 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1532 if (is1) { 1533 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1534 ierr = MatDestroy(&An);CHKERRQ(ierr); 1535 } else { 1536 *B = An; 1537 } 1538 PetscFunctionReturn(0); 1539 } 1540 1541 /* TODO: add reuse flag */ 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatSeqAIJCompress" 1544 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1545 { 1546 Mat Bt; 1547 PetscScalar *a,*bdata; 1548 const PetscInt *ii,*ij; 1549 PetscInt m,n,i,nnz,*bii,*bij; 1550 PetscBool flg_row; 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1555 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1556 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1557 nnz = n; 1558 for (i=0;i<ii[n];i++) { 1559 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1560 } 1561 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1562 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1563 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1564 nnz = 0; 1565 bii[0] = 0; 1566 for (i=0;i<n;i++) { 1567 PetscInt j; 1568 for (j=ii[i];j<ii[i+1];j++) { 1569 PetscScalar entry = a[j]; 1570 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1571 bij[nnz] = ij[j]; 1572 bdata[nnz] = entry; 1573 nnz++; 1574 } 1575 } 1576 bii[i+1] = nnz; 1577 } 1578 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1579 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1580 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1581 { 1582 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1583 b->free_a = PETSC_TRUE; 1584 b->free_ij = PETSC_TRUE; 1585 } 1586 *B = Bt; 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatDetectDisconnectedComponents" 1592 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1593 { 1594 Mat B; 1595 IS is_dummy,*cc_n; 1596 ISLocalToGlobalMapping l2gmap_dummy; 1597 PCBDDCGraph graph; 1598 PetscInt i,n; 1599 PetscInt *xadj,*adjncy; 1600 PetscInt *xadj_filtered,*adjncy_filtered; 1601 PetscBool flg_row,isseqaij; 1602 PetscErrorCode ierr; 1603 1604 PetscFunctionBegin; 1605 if (!A->rmap->N || !A->cmap->N) { 1606 *ncc = 0; 1607 *cc = NULL; 1608 PetscFunctionReturn(0); 1609 } 1610 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1611 if (!isseqaij && filter) { 1612 PetscBool isseqdense; 1613 1614 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1615 if (!isseqdense) { 1616 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1617 } else { /* TODO: rectangular case and LDA */ 1618 PetscScalar *array; 1619 PetscReal chop=1.e-6; 1620 1621 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1622 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1623 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1624 for (i=0;i<n;i++) { 1625 PetscInt j; 1626 for (j=i+1;j<n;j++) { 1627 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1628 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1629 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1630 } 1631 } 1632 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1633 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1634 } 1635 } else { 1636 B = A; 1637 } 1638 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1639 1640 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1641 if (filter) { 1642 PetscScalar *data; 1643 PetscInt j,cum; 1644 1645 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1646 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1647 cum = 0; 1648 for (i=0;i<n;i++) { 1649 PetscInt t; 1650 1651 for (j=xadj[i];j<xadj[i+1];j++) { 1652 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1653 continue; 1654 } 1655 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1656 } 1657 t = xadj_filtered[i]; 1658 xadj_filtered[i] = cum; 1659 cum += t; 1660 } 1661 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1662 } else { 1663 xadj_filtered = NULL; 1664 adjncy_filtered = NULL; 1665 } 1666 1667 /* compute local connected components using PCBDDCGraph */ 1668 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1669 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1670 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1671 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1672 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1673 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1674 if (xadj_filtered) { 1675 graph->xadj = xadj_filtered; 1676 graph->adjncy = adjncy_filtered; 1677 } else { 1678 graph->xadj = xadj; 1679 graph->adjncy = adjncy; 1680 } 1681 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1682 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1683 /* partial clean up */ 1684 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1685 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1686 if (A != B) { 1687 ierr = MatDestroy(&B);CHKERRQ(ierr); 1688 } 1689 1690 /* get back data */ 1691 if (ncc) *ncc = graph->ncc; 1692 if (cc) { 1693 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1694 for (i=0;i<graph->ncc;i++) { 1695 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1696 } 1697 *cc = cc_n; 1698 } 1699 /* clean up graph */ 1700 graph->xadj = 0; 1701 graph->adjncy = 0; 1702 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "PCBDDCBenignCheck" 1708 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1709 { 1710 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1711 PC_IS* pcis = (PC_IS*)(pc->data); 1712 IS dirIS = NULL; 1713 PetscInt i; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1718 if (zerodiag) { 1719 Mat A; 1720 Vec vec3_N; 1721 PetscScalar *vals; 1722 const PetscInt *idxs; 1723 PetscInt nz,*count; 1724 1725 /* p0 */ 1726 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1727 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1728 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1729 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1730 for (i=0;i<nz;i++) vals[i] = 1.; 1731 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1732 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1733 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1734 /* v_I */ 1735 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1736 for (i=0;i<nz;i++) vals[i] = 0.; 1737 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1738 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1739 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1740 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1741 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1742 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1743 if (dirIS) { 1744 PetscInt n; 1745 1746 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1747 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1748 for (i=0;i<n;i++) vals[i] = 0.; 1749 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1750 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1751 } 1752 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1753 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1754 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1755 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1756 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1757 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1758 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1759 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1760 ierr = PetscFree(vals);CHKERRQ(ierr); 1761 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1762 1763 /* there should not be any pressure dofs lying on the interface */ 1764 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1765 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1766 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1767 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1768 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1769 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1770 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1771 ierr = PetscFree(count);CHKERRQ(ierr); 1772 } 1773 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1774 1775 /* check PCBDDCBenignGetOrSetP0 */ 1776 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1777 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1778 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1779 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1780 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1781 for (i=0;i<pcbddc->benign_n;i++) { 1782 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1783 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1784 } 1785 PetscFunctionReturn(0); 1786 } 1787 1788 #undef __FUNCT__ 1789 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1790 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1791 { 1792 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1793 IS pressures,zerodiag,*zerodiag_subs; 1794 PetscInt nz,n; 1795 PetscInt *interior_dofs,n_interior_dofs; 1796 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1801 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1802 for (n=0;n<pcbddc->benign_n;n++) { 1803 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1804 } 1805 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1806 pcbddc->benign_n = 0; 1807 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1808 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1809 Checks if all the pressure dofs in each subdomain have a zero diagonal 1810 If not, a change of basis on pressures is not needed 1811 since the local Schur complements are already SPD 1812 */ 1813 has_null_pressures = PETSC_TRUE; 1814 have_null = PETSC_TRUE; 1815 if (pcbddc->n_ISForDofsLocal) { 1816 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1817 1818 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1819 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1820 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1821 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1822 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1823 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1824 if (!sorted) { 1825 ierr = ISSort(pressures);CHKERRQ(ierr); 1826 } 1827 } else { 1828 pressures = NULL; 1829 } 1830 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1831 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1832 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1833 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1834 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1835 if (!sorted) { 1836 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1837 } 1838 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1839 if (!nz) { 1840 if (n) have_null = PETSC_FALSE; 1841 has_null_pressures = PETSC_FALSE; 1842 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1843 } 1844 recompute_zerodiag = PETSC_FALSE; 1845 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1846 zerodiag_subs = NULL; 1847 pcbddc->benign_n = 0; 1848 n_interior_dofs = 0; 1849 interior_dofs = NULL; 1850 if (pcbddc->current_level) { /* need to compute interior nodes */ 1851 PetscInt n,i,j; 1852 PetscInt n_neigh,*neigh,*n_shared,**shared; 1853 PetscInt *iwork; 1854 1855 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1856 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1857 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1858 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1859 for (i=1;i<n_neigh;i++) 1860 for (j=0;j<n_shared[i];j++) 1861 iwork[shared[i][j]] += 1; 1862 for (i=0;i<n;i++) 1863 if (!iwork[i]) 1864 interior_dofs[n_interior_dofs++] = i; 1865 ierr = PetscFree(iwork);CHKERRQ(ierr); 1866 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1867 } 1868 if (has_null_pressures) { 1869 IS *subs; 1870 PetscInt nsubs,i,j,nl; 1871 const PetscInt *idxs; 1872 PetscScalar *array; 1873 Vec *work; 1874 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1875 1876 subs = pcbddc->local_subs; 1877 nsubs = pcbddc->n_local_subs; 1878 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1879 if (pcbddc->current_level) { 1880 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1881 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1882 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1883 /* work[0] = 1_p */ 1884 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1885 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1886 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1887 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1888 /* work[0] = 1_v */ 1889 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1890 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1891 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1892 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1893 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1894 } 1895 if (nsubs > 1) { 1896 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1897 for (i=0;i<nsubs;i++) { 1898 ISLocalToGlobalMapping l2g; 1899 IS t_zerodiag_subs; 1900 PetscInt nl; 1901 1902 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1903 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1904 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1905 if (nl) { 1906 PetscBool valid = PETSC_TRUE; 1907 1908 if (pcbddc->current_level) { 1909 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1910 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1911 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1912 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1913 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1914 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1915 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 1916 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 1917 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1918 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 1919 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1920 for (j=0;j<n_interior_dofs;j++) { 1921 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1922 valid = PETSC_FALSE; 1923 break; 1924 } 1925 } 1926 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1927 } 1928 if (valid && pcbddc->NeumannBoundariesLocal) { 1929 IS t_bc; 1930 PetscInt nzb; 1931 1932 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 1933 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 1934 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 1935 if (nzb) valid = PETSC_FALSE; 1936 } 1937 if (valid && pressures) { 1938 IS t_pressure_subs; 1939 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 1940 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 1941 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 1942 } 1943 if (valid) { 1944 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 1945 pcbddc->benign_n++; 1946 } else { 1947 recompute_zerodiag = PETSC_TRUE; 1948 } 1949 } 1950 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 1951 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 1952 } 1953 } else { /* there's just one subdomain (or zero if they have not been detected */ 1954 PetscBool valid = PETSC_TRUE; 1955 1956 if (pcbddc->NeumannBoundariesLocal) { 1957 PetscInt nzb; 1958 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 1959 if (nzb) valid = PETSC_FALSE; 1960 } 1961 if (valid && pressures) { 1962 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 1963 } 1964 if (valid && pcbddc->current_level) { 1965 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 1966 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 1967 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1968 for (j=0;j<n_interior_dofs;j++) { 1969 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1970 valid = PETSC_FALSE; 1971 break; 1972 } 1973 } 1974 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1975 } 1976 if (valid) { 1977 pcbddc->benign_n = 1; 1978 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 1979 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 1980 zerodiag_subs[0] = zerodiag; 1981 } 1982 } 1983 if (pcbddc->current_level) { 1984 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 1985 } 1986 } 1987 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 1988 1989 if (!pcbddc->benign_n) { 1990 PetscInt n; 1991 1992 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1993 recompute_zerodiag = PETSC_FALSE; 1994 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1995 if (n) { 1996 has_null_pressures = PETSC_FALSE; 1997 have_null = PETSC_FALSE; 1998 } 1999 } 2000 2001 /* final check for null pressures */ 2002 if (zerodiag && pressures) { 2003 PetscInt nz,np; 2004 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2005 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2006 if (nz != np) have_null = PETSC_FALSE; 2007 } 2008 2009 if (recompute_zerodiag) { 2010 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2011 if (pcbddc->benign_n == 1) { 2012 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2013 zerodiag = zerodiag_subs[0]; 2014 } else { 2015 PetscInt i,nzn,*new_idxs; 2016 2017 nzn = 0; 2018 for (i=0;i<pcbddc->benign_n;i++) { 2019 PetscInt ns; 2020 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2021 nzn += ns; 2022 } 2023 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2024 nzn = 0; 2025 for (i=0;i<pcbddc->benign_n;i++) { 2026 PetscInt ns,*idxs; 2027 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2028 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2029 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2030 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2031 nzn += ns; 2032 } 2033 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2034 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2035 } 2036 have_null = PETSC_FALSE; 2037 } 2038 2039 /* Prepare matrix to compute no-net-flux */ 2040 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2041 Mat A,loc_divudotp; 2042 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2043 IS row,col,isused = NULL; 2044 PetscInt M,N,n,st,n_isused; 2045 2046 if (pressures) { 2047 isused = pressures; 2048 } else { 2049 isused = zerodiag; 2050 } 2051 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2052 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2053 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2054 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2055 n_isused = 0; 2056 if (isused) { 2057 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2058 } 2059 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2060 st = st-n_isused; 2061 if (n) { 2062 const PetscInt *gidxs; 2063 2064 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2065 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2066 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2067 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2068 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2069 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2070 } else { 2071 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2072 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2073 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2074 } 2075 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2076 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2077 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2078 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2079 ierr = ISDestroy(&row);CHKERRQ(ierr); 2080 ierr = ISDestroy(&col);CHKERRQ(ierr); 2081 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2082 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2083 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2084 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2085 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2086 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2087 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2088 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2089 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2090 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2091 } 2092 2093 /* change of basis and p0 dofs */ 2094 if (has_null_pressures) { 2095 IS zerodiagc; 2096 const PetscInt *idxs,*idxsc; 2097 PetscInt i,s,*nnz; 2098 2099 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2100 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2101 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2102 /* local change of basis for pressures */ 2103 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2104 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2105 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2106 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2107 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2108 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2109 for (i=0;i<pcbddc->benign_n;i++) { 2110 PetscInt nzs,j; 2111 2112 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2113 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2114 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2115 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2116 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2117 } 2118 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2119 ierr = PetscFree(nnz);CHKERRQ(ierr); 2120 /* set identity on velocities */ 2121 for (i=0;i<n-nz;i++) { 2122 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2123 } 2124 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2125 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2126 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2127 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2128 /* set change on pressures */ 2129 for (s=0;s<pcbddc->benign_n;s++) { 2130 PetscScalar *array; 2131 PetscInt nzs; 2132 2133 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2134 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2135 for (i=0;i<nzs-1;i++) { 2136 PetscScalar vals[2]; 2137 PetscInt cols[2]; 2138 2139 cols[0] = idxs[i]; 2140 cols[1] = idxs[nzs-1]; 2141 vals[0] = 1.; 2142 vals[1] = 1.; 2143 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2144 } 2145 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2146 for (i=0;i<nzs-1;i++) array[i] = -1.; 2147 array[nzs-1] = 1.; 2148 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2149 /* store local idxs for p0 */ 2150 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2151 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2152 ierr = PetscFree(array);CHKERRQ(ierr); 2153 } 2154 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2155 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 /* project if needed */ 2157 if (pcbddc->benign_change_explicit) { 2158 Mat M; 2159 2160 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2161 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2162 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2163 ierr = MatDestroy(&M);CHKERRQ(ierr); 2164 } 2165 /* store global idxs for p0 */ 2166 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2167 } 2168 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2169 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2170 2171 /* determines if the coarse solver will be singular or not */ 2172 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2173 /* determines if the problem has subdomains with 0 pressure block */ 2174 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2175 *zerodiaglocal = zerodiag; 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2181 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2182 { 2183 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2184 PetscScalar *array; 2185 PetscErrorCode ierr; 2186 2187 PetscFunctionBegin; 2188 if (!pcbddc->benign_sf) { 2189 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2190 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2191 } 2192 if (get) { 2193 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2194 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2195 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2196 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2197 } else { 2198 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2199 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2200 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2201 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2202 } 2203 PetscFunctionReturn(0); 2204 } 2205 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2208 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2209 { 2210 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2211 PetscErrorCode ierr; 2212 2213 PetscFunctionBegin; 2214 /* TODO: add error checking 2215 - avoid nested pop (or push) calls. 2216 - cannot push before pop. 2217 - cannot call this if pcbddc->local_mat is NULL 2218 */ 2219 if (!pcbddc->benign_n) { 2220 PetscFunctionReturn(0); 2221 } 2222 if (pop) { 2223 if (pcbddc->benign_change_explicit) { 2224 IS is_p0; 2225 MatReuse reuse; 2226 2227 /* extract B_0 */ 2228 reuse = MAT_INITIAL_MATRIX; 2229 if (pcbddc->benign_B0) { 2230 reuse = MAT_REUSE_MATRIX; 2231 } 2232 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2233 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2234 /* remove rows and cols from local problem */ 2235 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2236 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2237 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2238 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2239 } else { 2240 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2241 PetscScalar *vals; 2242 PetscInt i,n,*idxs_ins; 2243 2244 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2245 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2246 if (!pcbddc->benign_B0) { 2247 PetscInt *nnz; 2248 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2249 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2250 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2251 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2252 for (i=0;i<pcbddc->benign_n;i++) { 2253 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2254 nnz[i] = n - nnz[i]; 2255 } 2256 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz);CHKERRQ(ierr); 2258 } 2259 2260 for (i=0;i<pcbddc->benign_n;i++) { 2261 PetscScalar *array; 2262 PetscInt *idxs,j,nz,cum; 2263 2264 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2265 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2266 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2267 for (j=0;j<nz;j++) vals[j] = 1.; 2268 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2269 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2270 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2271 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2272 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2273 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2274 cum = 0; 2275 for (j=0;j<n;j++) { 2276 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2277 vals[cum] = array[j]; 2278 idxs_ins[cum] = j; 2279 cum++; 2280 } 2281 } 2282 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2283 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2284 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2285 } 2286 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2287 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2288 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2289 } 2290 } else { /* push */ 2291 if (pcbddc->benign_change_explicit) { 2292 PetscInt i; 2293 2294 for (i=0;i<pcbddc->benign_n;i++) { 2295 PetscScalar *B0_vals; 2296 PetscInt *B0_cols,B0_ncol; 2297 2298 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2299 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2300 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2301 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2302 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2303 } 2304 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2305 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2306 } else { 2307 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2308 } 2309 } 2310 PetscFunctionReturn(0); 2311 } 2312 2313 #undef __FUNCT__ 2314 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2315 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2316 { 2317 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2318 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2319 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2320 PetscBLASInt *B_iwork,*B_ifail; 2321 PetscScalar *work,lwork; 2322 PetscScalar *St,*S,*eigv; 2323 PetscScalar *Sarray,*Starray; 2324 PetscReal *eigs,thresh; 2325 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2326 PetscBool allocated_S_St; 2327 #if defined(PETSC_USE_COMPLEX) 2328 PetscReal *rwork; 2329 #endif 2330 PetscErrorCode ierr; 2331 2332 PetscFunctionBegin; 2333 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2334 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2335 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2336 2337 if (pcbddc->dbg_flag) { 2338 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2339 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2340 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2341 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2342 } 2343 2344 if (pcbddc->dbg_flag) { 2345 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2346 } 2347 2348 /* max size of subsets */ 2349 mss = 0; 2350 for (i=0;i<sub_schurs->n_subs;i++) { 2351 PetscInt subset_size; 2352 2353 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2354 mss = PetscMax(mss,subset_size); 2355 } 2356 2357 /* min/max and threshold */ 2358 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2359 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2360 nmax = PetscMax(nmin,nmax); 2361 allocated_S_St = PETSC_FALSE; 2362 if (nmin) { 2363 allocated_S_St = PETSC_TRUE; 2364 } 2365 2366 /* allocate lapack workspace */ 2367 cum = cum2 = 0; 2368 maxneigs = 0; 2369 for (i=0;i<sub_schurs->n_subs;i++) { 2370 PetscInt n,subset_size; 2371 2372 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2373 n = PetscMin(subset_size,nmax); 2374 cum += subset_size; 2375 cum2 += subset_size*n; 2376 maxneigs = PetscMax(maxneigs,n); 2377 } 2378 if (mss) { 2379 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2380 PetscBLASInt B_itype = 1; 2381 PetscBLASInt B_N = mss; 2382 PetscReal zero = 0.0; 2383 PetscReal eps = 0.0; /* dlamch? */ 2384 2385 B_lwork = -1; 2386 S = NULL; 2387 St = NULL; 2388 eigs = NULL; 2389 eigv = NULL; 2390 B_iwork = NULL; 2391 B_ifail = NULL; 2392 #if defined(PETSC_USE_COMPLEX) 2393 rwork = NULL; 2394 #endif 2395 thresh = 1.0; 2396 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2397 #if defined(PETSC_USE_COMPLEX) 2398 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2399 #else 2400 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2401 #endif 2402 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2403 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2404 } else { 2405 /* TODO */ 2406 } 2407 } else { 2408 lwork = 0; 2409 } 2410 2411 nv = 0; 2412 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2413 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2414 } 2415 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2416 if (allocated_S_St) { 2417 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2418 } 2419 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2420 #if defined(PETSC_USE_COMPLEX) 2421 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2422 #endif 2423 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2424 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2425 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2426 nv+cum,&pcbddc->adaptive_constraints_idxs, 2427 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2428 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2429 2430 maxneigs = 0; 2431 cum = cumarray = 0; 2432 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2433 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2434 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2435 const PetscInt *idxs; 2436 2437 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2438 for (cum=0;cum<nv;cum++) { 2439 pcbddc->adaptive_constraints_n[cum] = 1; 2440 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2441 pcbddc->adaptive_constraints_data[cum] = 1.0; 2442 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2443 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2444 } 2445 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2446 } 2447 2448 if (mss) { /* multilevel */ 2449 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2450 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2451 } 2452 2453 thresh = pcbddc->adaptive_threshold; 2454 for (i=0;i<sub_schurs->n_subs;i++) { 2455 const PetscInt *idxs; 2456 PetscReal upper,lower; 2457 PetscInt j,subset_size,eigs_start = 0; 2458 PetscBLASInt B_N; 2459 PetscBool same_data = PETSC_FALSE; 2460 2461 if (pcbddc->use_deluxe_scaling) { 2462 upper = PETSC_MAX_REAL; 2463 lower = thresh; 2464 } else { 2465 upper = 1./thresh; 2466 lower = 0.; 2467 } 2468 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2469 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2470 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2471 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2472 if (sub_schurs->is_hermitian) { 2473 PetscInt j,k; 2474 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2475 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2476 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2477 } 2478 for (j=0;j<subset_size;j++) { 2479 for (k=j;k<subset_size;k++) { 2480 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2481 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2482 } 2483 } 2484 } else { 2485 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2486 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2487 } 2488 } else { 2489 S = Sarray + cumarray; 2490 St = Starray + cumarray; 2491 } 2492 /* see if we can save some work */ 2493 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2494 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2495 } 2496 2497 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2498 B_neigs = 0; 2499 } else { 2500 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2501 PetscBLASInt B_itype = 1; 2502 PetscBLASInt B_IL, B_IU; 2503 PetscReal eps = -1.0; /* dlamch? */ 2504 PetscInt nmin_s; 2505 PetscBool compute_range = PETSC_FALSE; 2506 2507 if (pcbddc->dbg_flag) { 2508 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2509 } 2510 2511 compute_range = PETSC_FALSE; 2512 if (thresh > 1.+PETSC_SMALL && !same_data) { 2513 compute_range = PETSC_TRUE; 2514 } 2515 2516 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2517 if (compute_range) { 2518 2519 /* ask for eigenvalues larger than thresh */ 2520 #if defined(PETSC_USE_COMPLEX) 2521 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2522 #else 2523 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2524 #endif 2525 } else if (!same_data) { 2526 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2527 B_IL = 1; 2528 #if defined(PETSC_USE_COMPLEX) 2529 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2530 #else 2531 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2532 #endif 2533 } else { /* same_data is true, so get the adaptive function requested by the user */ 2534 PetscInt k; 2535 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2536 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2537 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2538 nmin = nmax; 2539 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2540 for (k=0;k<nmax;k++) { 2541 eigs[k] = 1./PETSC_SMALL; 2542 eigv[k*(subset_size+1)] = 1.0; 2543 } 2544 } 2545 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2546 if (B_ierr) { 2547 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2548 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2549 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2550 } 2551 2552 if (B_neigs > nmax) { 2553 if (pcbddc->dbg_flag) { 2554 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2555 } 2556 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2557 B_neigs = nmax; 2558 } 2559 2560 nmin_s = PetscMin(nmin,B_N); 2561 if (B_neigs < nmin_s) { 2562 PetscBLASInt B_neigs2; 2563 2564 if (pcbddc->use_deluxe_scaling) { 2565 B_IL = B_N - nmin_s + 1; 2566 B_IU = B_N - B_neigs; 2567 } else { 2568 B_IL = B_neigs + 1; 2569 B_IU = nmin_s; 2570 } 2571 if (pcbddc->dbg_flag) { 2572 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2573 } 2574 if (sub_schurs->is_hermitian) { 2575 PetscInt j,k; 2576 for (j=0;j<subset_size;j++) { 2577 for (k=j;k<subset_size;k++) { 2578 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2579 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2580 } 2581 } 2582 } else { 2583 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2584 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2585 } 2586 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2587 #if defined(PETSC_USE_COMPLEX) 2588 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2589 #else 2590 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2591 #endif 2592 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2593 B_neigs += B_neigs2; 2594 } 2595 if (B_ierr) { 2596 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2597 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2598 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2599 } 2600 if (pcbddc->dbg_flag) { 2601 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2602 for (j=0;j<B_neigs;j++) { 2603 if (eigs[j] == 0.0) { 2604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2605 } else { 2606 if (pcbddc->use_deluxe_scaling) { 2607 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2608 } else { 2609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2610 } 2611 } 2612 } 2613 } 2614 } else { 2615 /* TODO */ 2616 } 2617 } 2618 /* change the basis back to the original one */ 2619 if (sub_schurs->change) { 2620 Mat change,phi,phit; 2621 2622 if (pcbddc->dbg_flag > 1) { 2623 PetscInt ii; 2624 for (ii=0;ii<B_neigs;ii++) { 2625 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2626 for (j=0;j<B_N;j++) { 2627 #if defined(PETSC_USE_COMPLEX) 2628 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2629 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2630 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2631 #else 2632 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2633 #endif 2634 } 2635 } 2636 } 2637 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2638 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2639 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2640 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2641 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2642 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2643 } 2644 maxneigs = PetscMax(B_neigs,maxneigs); 2645 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2646 if (B_neigs) { 2647 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2648 2649 if (pcbddc->dbg_flag > 1) { 2650 PetscInt ii; 2651 for (ii=0;ii<B_neigs;ii++) { 2652 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2653 for (j=0;j<B_N;j++) { 2654 #if defined(PETSC_USE_COMPLEX) 2655 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2656 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2657 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2658 #else 2659 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2660 #endif 2661 } 2662 } 2663 } 2664 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2665 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2666 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2667 cum++; 2668 } 2669 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2670 /* shift for next computation */ 2671 cumarray += subset_size*subset_size; 2672 } 2673 if (pcbddc->dbg_flag) { 2674 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2675 } 2676 2677 if (mss) { 2678 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2679 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2680 /* destroy matrices (junk) */ 2681 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2682 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2683 } 2684 if (allocated_S_St) { 2685 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2686 } 2687 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2688 #if defined(PETSC_USE_COMPLEX) 2689 ierr = PetscFree(rwork);CHKERRQ(ierr); 2690 #endif 2691 if (pcbddc->dbg_flag) { 2692 PetscInt maxneigs_r; 2693 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2694 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2695 } 2696 PetscFunctionReturn(0); 2697 } 2698 2699 #undef __FUNCT__ 2700 #define __FUNCT__ "PCBDDCSetUpSolvers" 2701 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2702 { 2703 PetscScalar *coarse_submat_vals; 2704 PetscErrorCode ierr; 2705 2706 PetscFunctionBegin; 2707 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2708 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2709 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2710 2711 /* Setup local neumann solver ksp_R */ 2712 /* PCBDDCSetUpLocalScatters should be called first! */ 2713 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2714 2715 /* 2716 Setup local correction and local part of coarse basis. 2717 Gives back the dense local part of the coarse matrix in column major ordering 2718 */ 2719 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2720 2721 /* Compute total number of coarse nodes and setup coarse solver */ 2722 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2723 2724 /* free */ 2725 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2726 PetscFunctionReturn(0); 2727 } 2728 2729 #undef __FUNCT__ 2730 #define __FUNCT__ "PCBDDCResetCustomization" 2731 PetscErrorCode PCBDDCResetCustomization(PC pc) 2732 { 2733 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2734 PetscErrorCode ierr; 2735 2736 PetscFunctionBegin; 2737 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2738 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2739 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2740 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2741 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2742 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2743 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2744 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2745 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2746 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 #undef __FUNCT__ 2751 #define __FUNCT__ "PCBDDCResetTopography" 2752 PetscErrorCode PCBDDCResetTopography(PC pc) 2753 { 2754 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2755 PetscInt i; 2756 PetscErrorCode ierr; 2757 2758 PetscFunctionBegin; 2759 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2760 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2761 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2762 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2763 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2764 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2765 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2766 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2767 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2768 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2769 for (i=0;i<pcbddc->n_local_subs;i++) { 2770 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2771 } 2772 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2773 if (pcbddc->sub_schurs) { 2774 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2775 } 2776 pcbddc->graphanalyzed = PETSC_FALSE; 2777 pcbddc->recompute_topography = PETSC_TRUE; 2778 PetscFunctionReturn(0); 2779 } 2780 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "PCBDDCResetSolvers" 2783 PetscErrorCode PCBDDCResetSolvers(PC pc) 2784 { 2785 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2786 PetscErrorCode ierr; 2787 2788 PetscFunctionBegin; 2789 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2790 if (pcbddc->coarse_phi_B) { 2791 PetscScalar *array; 2792 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2793 ierr = PetscFree(array);CHKERRQ(ierr); 2794 } 2795 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2796 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2797 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2798 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2799 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2800 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2801 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2802 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2803 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2804 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2805 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2806 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2807 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2808 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2809 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2810 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2811 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2812 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2813 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2814 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2815 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2816 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2817 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2818 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2819 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2820 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2821 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2822 if (pcbddc->benign_zerodiag_subs) { 2823 PetscInt i; 2824 for (i=0;i<pcbddc->benign_n;i++) { 2825 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2826 } 2827 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2828 } 2829 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2830 PetscFunctionReturn(0); 2831 } 2832 2833 #undef __FUNCT__ 2834 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2835 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2836 { 2837 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2838 PC_IS *pcis = (PC_IS*)pc->data; 2839 VecType impVecType; 2840 PetscInt n_constraints,n_R,old_size; 2841 PetscErrorCode ierr; 2842 2843 PetscFunctionBegin; 2844 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2845 /* get sizes */ 2846 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2847 n_R = pcis->n - pcbddc->n_vertices; 2848 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2849 /* local work vectors (try to avoid unneeded work)*/ 2850 /* R nodes */ 2851 old_size = -1; 2852 if (pcbddc->vec1_R) { 2853 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2854 } 2855 if (n_R != old_size) { 2856 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2857 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2858 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2859 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2860 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2861 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2862 } 2863 /* local primal dofs */ 2864 old_size = -1; 2865 if (pcbddc->vec1_P) { 2866 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2867 } 2868 if (pcbddc->local_primal_size != old_size) { 2869 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2870 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2871 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2872 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2873 } 2874 /* local explicit constraints */ 2875 old_size = -1; 2876 if (pcbddc->vec1_C) { 2877 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2878 } 2879 if (n_constraints && n_constraints != old_size) { 2880 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2881 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2882 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2883 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2884 } 2885 PetscFunctionReturn(0); 2886 } 2887 2888 #undef __FUNCT__ 2889 #define __FUNCT__ "PCBDDCSetUpCorrection" 2890 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2891 { 2892 PetscErrorCode ierr; 2893 /* pointers to pcis and pcbddc */ 2894 PC_IS* pcis = (PC_IS*)pc->data; 2895 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2896 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2897 /* submatrices of local problem */ 2898 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2899 /* submatrices of local coarse problem */ 2900 Mat S_VV,S_CV,S_VC,S_CC; 2901 /* working matrices */ 2902 Mat C_CR; 2903 /* additional working stuff */ 2904 PC pc_R; 2905 Mat F; 2906 Vec dummy_vec; 2907 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2908 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2909 PetscScalar *work; 2910 PetscInt *idx_V_B; 2911 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2912 PetscInt i,n_R,n_D,n_B; 2913 2914 /* some shortcuts to scalars */ 2915 PetscScalar one=1.0,m_one=-1.0; 2916 2917 PetscFunctionBegin; 2918 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 2919 2920 /* Set Non-overlapping dimensions */ 2921 n_vertices = pcbddc->n_vertices; 2922 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 2923 n_B = pcis->n_B; 2924 n_D = pcis->n - n_B; 2925 n_R = pcis->n - n_vertices; 2926 2927 /* vertices in boundary numbering */ 2928 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 2929 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 2930 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 2931 2932 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2933 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 2934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 2935 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 2936 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 2937 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 2938 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 2939 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 2940 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 2941 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 2942 2943 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 2944 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 2945 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 2946 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 2947 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 2948 lda_rhs = n_R; 2949 need_benign_correction = PETSC_FALSE; 2950 if (isLU || isILU || isCHOL) { 2951 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 2952 } else if (sub_schurs && sub_schurs->reuse_solver) { 2953 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2954 MatFactorType type; 2955 2956 F = reuse_solver->F; 2957 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 2958 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 2959 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 2960 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 2961 } else { 2962 F = NULL; 2963 } 2964 2965 /* allocate workspace */ 2966 n = 0; 2967 if (n_constraints) { 2968 n += lda_rhs*n_constraints; 2969 } 2970 if (n_vertices) { 2971 n = PetscMax(2*lda_rhs*n_vertices,n); 2972 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 2973 } 2974 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 2975 2976 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 2977 dummy_vec = NULL; 2978 if (need_benign_correction && lda_rhs != n_R && F) { 2979 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 2980 } 2981 2982 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2983 if (n_constraints) { 2984 Mat M1,M2,M3,C_B; 2985 IS is_aux; 2986 PetscScalar *array,*array2; 2987 2988 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2989 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2990 2991 /* Extract constraints on R nodes: C_{CR} */ 2992 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 2993 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2994 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2995 2996 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 2997 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 2998 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2999 for (i=0;i<n_constraints;i++) { 3000 const PetscScalar *row_cmat_values; 3001 const PetscInt *row_cmat_indices; 3002 PetscInt size_of_constraint,j; 3003 3004 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3005 for (j=0;j<size_of_constraint;j++) { 3006 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3007 } 3008 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3009 } 3010 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3011 if (F) { 3012 Mat B; 3013 3014 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3015 if (need_benign_correction) { 3016 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3017 3018 /* rhs is already zero on interior dofs, no need to change the rhs */ 3019 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3020 } 3021 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3022 if (need_benign_correction) { 3023 PetscScalar *marr; 3024 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3025 3026 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3027 if (lda_rhs != n_R) { 3028 for (i=0;i<n_constraints;i++) { 3029 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3030 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3031 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3032 } 3033 } else { 3034 for (i=0;i<n_constraints;i++) { 3035 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3036 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3037 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3038 } 3039 } 3040 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3041 } 3042 ierr = MatDestroy(&B);CHKERRQ(ierr); 3043 } else { 3044 PetscScalar *marr; 3045 3046 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3047 for (i=0;i<n_constraints;i++) { 3048 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3049 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3050 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3051 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3052 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3053 } 3054 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3055 } 3056 if (!pcbddc->switch_static) { 3057 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3058 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3059 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3060 for (i=0;i<n_constraints;i++) { 3061 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3062 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3063 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3064 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3065 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3066 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3067 } 3068 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3069 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3070 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3071 } else { 3072 if (lda_rhs != n_R) { 3073 IS dummy; 3074 3075 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3076 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3077 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3078 } else { 3079 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3080 pcbddc->local_auxmat2 = local_auxmat2_R; 3081 } 3082 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3083 } 3084 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3085 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3086 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3087 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3088 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3089 if (isCHOL) { 3090 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3091 } else { 3092 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3093 } 3094 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3095 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3096 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3097 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3098 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3099 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3100 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3101 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3102 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3103 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3104 } 3105 3106 /* Get submatrices from subdomain matrix */ 3107 if (n_vertices) { 3108 IS is_aux; 3109 3110 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3111 IS tis; 3112 3113 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3114 ierr = ISSort(tis);CHKERRQ(ierr); 3115 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3116 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3117 } else { 3118 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3119 } 3120 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3121 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3122 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3123 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3124 } 3125 3126 /* Matrix of coarse basis functions (local) */ 3127 if (pcbddc->coarse_phi_B) { 3128 PetscInt on_B,on_primal,on_D=n_D; 3129 if (pcbddc->coarse_phi_D) { 3130 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3131 } 3132 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3133 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3134 PetscScalar *marray; 3135 3136 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3137 ierr = PetscFree(marray);CHKERRQ(ierr); 3138 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3139 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3140 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3141 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3142 } 3143 } 3144 3145 if (!pcbddc->coarse_phi_B) { 3146 PetscScalar *marray; 3147 3148 n = n_B*pcbddc->local_primal_size; 3149 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3150 n += n_D*pcbddc->local_primal_size; 3151 } 3152 if (!pcbddc->symmetric_primal) { 3153 n *= 2; 3154 } 3155 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3156 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3157 n = n_B*pcbddc->local_primal_size; 3158 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3159 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3160 n += n_D*pcbddc->local_primal_size; 3161 } 3162 if (!pcbddc->symmetric_primal) { 3163 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3164 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3165 n = n_B*pcbddc->local_primal_size; 3166 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3167 } 3168 } else { 3169 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3170 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3171 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3172 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3173 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3174 } 3175 } 3176 } 3177 3178 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3179 p0_lidx_I = NULL; 3180 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3181 const PetscInt *idxs; 3182 3183 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3184 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3185 for (i=0;i<pcbddc->benign_n;i++) { 3186 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3187 } 3188 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3189 } 3190 3191 /* vertices */ 3192 if (n_vertices) { 3193 3194 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3195 3196 if (n_R) { 3197 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3198 PetscBLASInt B_N,B_one = 1; 3199 PetscScalar *x,*y; 3200 PetscBool isseqaij; 3201 3202 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3203 if (need_benign_correction) { 3204 ISLocalToGlobalMapping RtoN; 3205 IS is_p0; 3206 PetscInt *idxs_p0,n; 3207 3208 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3209 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3210 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3211 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3212 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3213 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3214 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3215 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3216 } 3217 3218 if (lda_rhs == n_R) { 3219 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3220 } else { 3221 PetscScalar *av,*array; 3222 const PetscInt *xadj,*adjncy; 3223 PetscInt n; 3224 PetscBool flg_row; 3225 3226 array = work+lda_rhs*n_vertices; 3227 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3228 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3229 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3230 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3231 for (i=0;i<n;i++) { 3232 PetscInt j; 3233 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3234 } 3235 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3236 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3237 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3238 } 3239 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3240 if (need_benign_correction) { 3241 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3242 PetscScalar *marr; 3243 3244 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3245 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3246 3247 | 0 0 0 | (V) 3248 L = | 0 0 -1 | (P-p0) 3249 | 0 0 -1 | (p0) 3250 3251 */ 3252 for (i=0;i<reuse_solver->benign_n;i++) { 3253 const PetscScalar *vals; 3254 const PetscInt *idxs,*idxs_zero; 3255 PetscInt n,j,nz; 3256 3257 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3258 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3259 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3260 for (j=0;j<n;j++) { 3261 PetscScalar val = vals[j]; 3262 PetscInt k,col = idxs[j]; 3263 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3264 } 3265 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3266 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3267 } 3268 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3269 } 3270 if (F) { 3271 /* need to correct the rhs */ 3272 if (need_benign_correction) { 3273 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3274 PetscScalar *marr; 3275 3276 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3277 if (lda_rhs != n_R) { 3278 for (i=0;i<n_vertices;i++) { 3279 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3280 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3281 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3282 } 3283 } else { 3284 for (i=0;i<n_vertices;i++) { 3285 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3286 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3287 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3288 } 3289 } 3290 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3291 } 3292 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3293 /* need to correct the solution */ 3294 if (need_benign_correction) { 3295 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3296 PetscScalar *marr; 3297 3298 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3299 if (lda_rhs != n_R) { 3300 for (i=0;i<n_vertices;i++) { 3301 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3302 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3303 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3304 } 3305 } else { 3306 for (i=0;i<n_vertices;i++) { 3307 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3308 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3309 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3310 } 3311 } 3312 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3313 } 3314 } else { 3315 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3316 for (i=0;i<n_vertices;i++) { 3317 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3318 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3319 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3320 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3321 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3322 } 3323 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3324 } 3325 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3326 /* S_VV and S_CV */ 3327 if (n_constraints) { 3328 Mat B; 3329 3330 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3331 for (i=0;i<n_vertices;i++) { 3332 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3333 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3334 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3335 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3336 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3337 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3338 } 3339 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3340 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3341 ierr = MatDestroy(&B);CHKERRQ(ierr); 3342 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3343 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3344 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3345 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3346 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3347 ierr = MatDestroy(&B);CHKERRQ(ierr); 3348 } 3349 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3350 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3351 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3352 } 3353 if (lda_rhs != n_R) { 3354 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3355 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3356 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3357 } 3358 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3359 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3360 if (need_benign_correction) { 3361 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3362 PetscScalar *marr,*sums; 3363 3364 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3365 ierr = MatDenseGetArray(S_VVt,&marr); 3366 for (i=0;i<reuse_solver->benign_n;i++) { 3367 const PetscScalar *vals; 3368 const PetscInt *idxs,*idxs_zero; 3369 PetscInt n,j,nz; 3370 3371 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3372 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3373 for (j=0;j<n_vertices;j++) { 3374 PetscInt k; 3375 sums[j] = 0.; 3376 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3377 } 3378 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3379 for (j=0;j<n;j++) { 3380 PetscScalar val = vals[j]; 3381 PetscInt k; 3382 for (k=0;k<n_vertices;k++) { 3383 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3384 } 3385 } 3386 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3387 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3388 } 3389 ierr = PetscFree(sums);CHKERRQ(ierr); 3390 ierr = MatDenseRestoreArray(S_VVt,&marr); 3391 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3392 } 3393 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3394 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3395 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3396 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3397 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3398 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3399 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3400 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3401 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3402 } else { 3403 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3404 } 3405 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3406 3407 /* coarse basis functions */ 3408 for (i=0;i<n_vertices;i++) { 3409 PetscScalar *y; 3410 3411 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3412 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3413 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3414 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3415 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3416 y[n_B*i+idx_V_B[i]] = 1.0; 3417 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3418 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3419 3420 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3421 PetscInt j; 3422 3423 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3424 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3425 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3426 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3427 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3428 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3429 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3430 } 3431 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3432 } 3433 /* if n_R == 0 the object is not destroyed */ 3434 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3435 } 3436 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3437 3438 if (n_constraints) { 3439 Mat B; 3440 3441 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3442 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3443 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3444 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3445 if (n_vertices) { 3446 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3447 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3448 } else { 3449 Mat S_VCt; 3450 3451 if (lda_rhs != n_R) { 3452 ierr = MatDestroy(&B);CHKERRQ(ierr); 3453 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3454 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3455 } 3456 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3457 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3458 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3459 } 3460 } 3461 ierr = MatDestroy(&B);CHKERRQ(ierr); 3462 /* coarse basis functions */ 3463 for (i=0;i<n_constraints;i++) { 3464 PetscScalar *y; 3465 3466 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3467 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3468 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3469 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3470 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3471 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3472 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3473 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3474 PetscInt j; 3475 3476 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3477 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3478 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3479 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3480 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3481 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3482 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3483 } 3484 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3485 } 3486 } 3487 if (n_constraints) { 3488 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3489 } 3490 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3491 3492 /* coarse matrix entries relative to B_0 */ 3493 if (pcbddc->benign_n) { 3494 Mat B0_B,B0_BPHI; 3495 IS is_dummy; 3496 PetscScalar *data; 3497 PetscInt j; 3498 3499 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3500 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3501 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3502 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3503 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3504 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3505 for (j=0;j<pcbddc->benign_n;j++) { 3506 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3507 for (i=0;i<pcbddc->local_primal_size;i++) { 3508 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3509 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3510 } 3511 } 3512 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3513 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3514 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3515 } 3516 3517 /* compute other basis functions for non-symmetric problems */ 3518 if (!pcbddc->symmetric_primal) { 3519 Mat B_V=NULL,B_C=NULL; 3520 PetscScalar *marray; 3521 3522 if (n_constraints) { 3523 Mat S_CCT,C_CRT; 3524 3525 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3526 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3527 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3528 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3529 if (n_vertices) { 3530 Mat S_VCT; 3531 3532 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3533 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3534 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3535 } 3536 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3537 } else { 3538 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3539 } 3540 if (n_vertices && n_R) { 3541 PetscScalar *av,*marray; 3542 const PetscInt *xadj,*adjncy; 3543 PetscInt n; 3544 PetscBool flg_row; 3545 3546 /* B_V = B_V - A_VR^T */ 3547 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3548 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3549 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3550 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3551 for (i=0;i<n;i++) { 3552 PetscInt j; 3553 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3554 } 3555 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3556 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3557 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3558 } 3559 3560 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3561 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3562 for (i=0;i<n_vertices;i++) { 3563 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3564 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3565 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3566 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3567 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3568 } 3569 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3570 if (B_C) { 3571 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3572 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3573 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3574 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3575 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3576 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3577 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3578 } 3579 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3580 } 3581 /* coarse basis functions */ 3582 for (i=0;i<pcbddc->local_primal_size;i++) { 3583 PetscScalar *y; 3584 3585 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3586 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3587 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3588 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3589 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3590 if (i<n_vertices) { 3591 y[n_B*i+idx_V_B[i]] = 1.0; 3592 } 3593 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3594 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3595 3596 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3597 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3598 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3599 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3600 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3601 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3602 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3603 } 3604 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3605 } 3606 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3607 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3608 } 3609 /* free memory */ 3610 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3611 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3612 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3613 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3614 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3615 ierr = PetscFree(work);CHKERRQ(ierr); 3616 if (n_vertices) { 3617 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3618 } 3619 if (n_constraints) { 3620 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3621 } 3622 /* Checking coarse_sub_mat and coarse basis functios */ 3623 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3624 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3625 if (pcbddc->dbg_flag) { 3626 Mat coarse_sub_mat; 3627 Mat AUXMAT,TM1,TM2,TM3,TM4; 3628 Mat coarse_phi_D,coarse_phi_B; 3629 Mat coarse_psi_D,coarse_psi_B; 3630 Mat A_II,A_BB,A_IB,A_BI; 3631 Mat C_B,CPHI; 3632 IS is_dummy; 3633 Vec mones; 3634 MatType checkmattype=MATSEQAIJ; 3635 PetscReal real_value; 3636 3637 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3638 Mat A; 3639 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3640 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3641 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3642 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3643 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3644 ierr = MatDestroy(&A);CHKERRQ(ierr); 3645 } else { 3646 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3647 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3648 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3649 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3650 } 3651 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3652 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3653 if (!pcbddc->symmetric_primal) { 3654 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3655 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3656 } 3657 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3658 3659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3660 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3661 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3662 if (!pcbddc->symmetric_primal) { 3663 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3664 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3665 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3666 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3667 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3668 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3669 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3670 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3671 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3672 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3673 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3674 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3675 } else { 3676 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3677 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3678 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3679 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3680 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3681 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3682 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3683 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3684 } 3685 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3686 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3687 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3688 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3689 if (pcbddc->benign_n) { 3690 Mat B0_B,B0_BPHI; 3691 PetscScalar *data,*data2; 3692 PetscInt j; 3693 3694 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3695 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3696 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3697 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3698 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3699 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3700 for (j=0;j<pcbddc->benign_n;j++) { 3701 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3702 for (i=0;i<pcbddc->local_primal_size;i++) { 3703 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3704 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3705 } 3706 } 3707 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3708 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3709 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3710 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3711 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3712 } 3713 #if 0 3714 { 3715 PetscViewer viewer; 3716 char filename[256]; 3717 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3718 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3719 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3720 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3721 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3722 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3723 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3724 if (save_change) { 3725 Mat phi_B; 3726 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3727 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3728 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3729 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3730 } else { 3731 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3732 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3733 } 3734 if (pcbddc->coarse_phi_D) { 3735 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3736 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3737 } 3738 if (pcbddc->coarse_psi_B) { 3739 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3740 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3741 } 3742 if (pcbddc->coarse_psi_D) { 3743 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3744 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3745 } 3746 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3747 } 3748 #endif 3749 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3750 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3751 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3752 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3753 3754 /* check constraints */ 3755 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3756 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3757 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3758 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3759 } else { 3760 PetscScalar *data; 3761 Mat tmat; 3762 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3764 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3765 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3766 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3767 } 3768 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3769 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3770 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3771 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3772 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3773 if (!pcbddc->symmetric_primal) { 3774 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3775 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3776 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3777 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3778 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3779 } 3780 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3781 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3782 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3783 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3784 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3785 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3786 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3787 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3788 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3789 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3790 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3791 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3792 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3793 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3794 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3795 if (!pcbddc->symmetric_primal) { 3796 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3797 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3798 } 3799 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3800 } 3801 /* get back data */ 3802 *coarse_submat_vals_n = coarse_submat_vals; 3803 PetscFunctionReturn(0); 3804 } 3805 3806 #undef __FUNCT__ 3807 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3808 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3809 { 3810 Mat *work_mat; 3811 IS isrow_s,iscol_s; 3812 PetscBool rsorted,csorted; 3813 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3814 PetscErrorCode ierr; 3815 3816 PetscFunctionBegin; 3817 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3818 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3819 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3820 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3821 3822 if (!rsorted) { 3823 const PetscInt *idxs; 3824 PetscInt *idxs_sorted,i; 3825 3826 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3827 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3828 for (i=0;i<rsize;i++) { 3829 idxs_perm_r[i] = i; 3830 } 3831 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3832 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3833 for (i=0;i<rsize;i++) { 3834 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3835 } 3836 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3837 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3838 } else { 3839 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3840 isrow_s = isrow; 3841 } 3842 3843 if (!csorted) { 3844 if (isrow == iscol) { 3845 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3846 iscol_s = isrow_s; 3847 } else { 3848 const PetscInt *idxs; 3849 PetscInt *idxs_sorted,i; 3850 3851 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3852 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3853 for (i=0;i<csize;i++) { 3854 idxs_perm_c[i] = i; 3855 } 3856 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3857 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3858 for (i=0;i<csize;i++) { 3859 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3860 } 3861 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3862 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3863 } 3864 } else { 3865 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3866 iscol_s = iscol; 3867 } 3868 3869 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3870 3871 if (!rsorted || !csorted) { 3872 Mat new_mat; 3873 IS is_perm_r,is_perm_c; 3874 3875 if (!rsorted) { 3876 PetscInt *idxs_r,i; 3877 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3878 for (i=0;i<rsize;i++) { 3879 idxs_r[idxs_perm_r[i]] = i; 3880 } 3881 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3882 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3883 } else { 3884 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3885 } 3886 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3887 3888 if (!csorted) { 3889 if (isrow_s == iscol_s) { 3890 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3891 is_perm_c = is_perm_r; 3892 } else { 3893 PetscInt *idxs_c,i; 3894 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3895 for (i=0;i<csize;i++) { 3896 idxs_c[idxs_perm_c[i]] = i; 3897 } 3898 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3899 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3900 } 3901 } else { 3902 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3903 } 3904 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3905 3906 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3907 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3908 work_mat[0] = new_mat; 3909 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3910 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3911 } 3912 3913 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 3914 *B = work_mat[0]; 3915 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 3916 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 3917 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 3918 PetscFunctionReturn(0); 3919 } 3920 3921 #undef __FUNCT__ 3922 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 3923 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 3924 { 3925 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3926 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3927 Mat new_mat; 3928 IS is_local,is_global; 3929 PetscInt local_size; 3930 PetscBool isseqaij; 3931 PetscErrorCode ierr; 3932 3933 PetscFunctionBegin; 3934 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3935 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 3936 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 3937 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 3938 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 3939 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 3940 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 3941 3942 /* check */ 3943 if (pcbddc->dbg_flag) { 3944 Vec x,x_change; 3945 PetscReal error; 3946 3947 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 3948 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3949 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 3950 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3951 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3952 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 3953 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3954 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3955 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3956 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3957 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3958 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 3959 ierr = VecDestroy(&x);CHKERRQ(ierr); 3960 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3961 } 3962 3963 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 3964 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3965 if (isseqaij) { 3966 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3967 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3968 } else { 3969 Mat work_mat; 3970 3971 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3972 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3973 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3974 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 3975 } 3976 if (matis->A->symmetric_set) { 3977 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 3978 #if !defined(PETSC_USE_COMPLEX) 3979 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 3980 #endif 3981 } 3982 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 3983 PetscFunctionReturn(0); 3984 } 3985 3986 #undef __FUNCT__ 3987 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 3988 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 3989 { 3990 PC_IS* pcis = (PC_IS*)(pc->data); 3991 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3992 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3993 PetscInt *idx_R_local=NULL; 3994 PetscInt n_vertices,i,j,n_R,n_D,n_B; 3995 PetscInt vbs,bs; 3996 PetscBT bitmask=NULL; 3997 PetscErrorCode ierr; 3998 3999 PetscFunctionBegin; 4000 /* 4001 No need to setup local scatters if 4002 - primal space is unchanged 4003 AND 4004 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4005 AND 4006 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4007 */ 4008 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4009 PetscFunctionReturn(0); 4010 } 4011 /* destroy old objects */ 4012 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4013 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4014 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4015 /* Set Non-overlapping dimensions */ 4016 n_B = pcis->n_B; 4017 n_D = pcis->n - n_B; 4018 n_vertices = pcbddc->n_vertices; 4019 4020 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4021 4022 /* create auxiliary bitmask and allocate workspace */ 4023 if (!sub_schurs || !sub_schurs->reuse_solver) { 4024 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4025 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4026 for (i=0;i<n_vertices;i++) { 4027 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4028 } 4029 4030 for (i=0, n_R=0; i<pcis->n; i++) { 4031 if (!PetscBTLookup(bitmask,i)) { 4032 idx_R_local[n_R++] = i; 4033 } 4034 } 4035 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4036 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4037 4038 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4039 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4040 } 4041 4042 /* Block code */ 4043 vbs = 1; 4044 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4045 if (bs>1 && !(n_vertices%bs)) { 4046 PetscBool is_blocked = PETSC_TRUE; 4047 PetscInt *vary; 4048 if (!sub_schurs || !sub_schurs->reuse_solver) { 4049 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4050 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4051 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4052 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4053 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4054 for (i=0; i<pcis->n/bs; i++) { 4055 if (vary[i]!=0 && vary[i]!=bs) { 4056 is_blocked = PETSC_FALSE; 4057 break; 4058 } 4059 } 4060 ierr = PetscFree(vary);CHKERRQ(ierr); 4061 } else { 4062 /* Verify directly the R set */ 4063 for (i=0; i<n_R/bs; i++) { 4064 PetscInt j,node=idx_R_local[bs*i]; 4065 for (j=1; j<bs; j++) { 4066 if (node != idx_R_local[bs*i+j]-j) { 4067 is_blocked = PETSC_FALSE; 4068 break; 4069 } 4070 } 4071 } 4072 } 4073 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4074 vbs = bs; 4075 for (i=0;i<n_R/vbs;i++) { 4076 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4077 } 4078 } 4079 } 4080 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4081 if (sub_schurs && sub_schurs->reuse_solver) { 4082 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4083 4084 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4085 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4086 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4087 reuse_solver->is_R = pcbddc->is_R_local; 4088 } else { 4089 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4090 } 4091 4092 /* print some info if requested */ 4093 if (pcbddc->dbg_flag) { 4094 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4095 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4096 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4097 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4098 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4099 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4100 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4101 } 4102 4103 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4104 if (!sub_schurs || !sub_schurs->reuse_solver) { 4105 IS is_aux1,is_aux2; 4106 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4107 4108 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4109 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4110 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4111 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4112 for (i=0; i<n_D; i++) { 4113 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4114 } 4115 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4116 for (i=0, j=0; i<n_R; i++) { 4117 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4118 aux_array1[j++] = i; 4119 } 4120 } 4121 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4122 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4123 for (i=0, j=0; i<n_B; i++) { 4124 if (!PetscBTLookup(bitmask,is_indices[i])) { 4125 aux_array2[j++] = i; 4126 } 4127 } 4128 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4129 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4130 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4131 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4132 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4133 4134 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4135 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4136 for (i=0, j=0; i<n_R; i++) { 4137 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4138 aux_array1[j++] = i; 4139 } 4140 } 4141 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4142 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4143 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4144 } 4145 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4146 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4147 } else { 4148 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4149 IS tis; 4150 PetscInt schur_size; 4151 4152 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4153 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4154 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4155 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4156 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4157 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4158 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4159 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4160 } 4161 } 4162 PetscFunctionReturn(0); 4163 } 4164 4165 4166 #undef __FUNCT__ 4167 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4168 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4169 { 4170 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4171 PC_IS *pcis = (PC_IS*)pc->data; 4172 PC pc_temp; 4173 Mat A_RR; 4174 MatReuse reuse; 4175 PetscScalar m_one = -1.0; 4176 PetscReal value; 4177 PetscInt n_D,n_R; 4178 PetscBool check_corr[2],issbaij; 4179 PetscErrorCode ierr; 4180 /* prefixes stuff */ 4181 char dir_prefix[256],neu_prefix[256],str_level[16]; 4182 size_t len; 4183 4184 PetscFunctionBegin; 4185 4186 /* compute prefixes */ 4187 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4188 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4189 if (!pcbddc->current_level) { 4190 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4191 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4192 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4193 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4194 } else { 4195 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4196 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4197 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4198 len -= 15; /* remove "pc_bddc_coarse_" */ 4199 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4200 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4201 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4202 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4203 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4204 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4205 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4206 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4207 } 4208 4209 /* DIRICHLET PROBLEM */ 4210 if (dirichlet) { 4211 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4212 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4213 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4214 if (pcbddc->dbg_flag) { 4215 Mat A_IIn; 4216 4217 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4218 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4219 pcis->A_II = A_IIn; 4220 } 4221 } 4222 if (pcbddc->local_mat->symmetric_set) { 4223 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4224 } 4225 /* Matrix for Dirichlet problem is pcis->A_II */ 4226 n_D = pcis->n - pcis->n_B; 4227 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4228 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4229 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4230 /* default */ 4231 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4232 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4233 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4234 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4235 if (issbaij) { 4236 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4237 } else { 4238 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4239 } 4240 /* Allow user's customization */ 4241 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4242 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4243 } 4244 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4245 if (sub_schurs && sub_schurs->reuse_solver) { 4246 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4247 4248 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4249 } 4250 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4251 if (!n_D) { 4252 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4253 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4254 } 4255 /* Set Up KSP for Dirichlet problem of BDDC */ 4256 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4257 /* set ksp_D into pcis data */ 4258 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4259 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4260 pcis->ksp_D = pcbddc->ksp_D; 4261 } 4262 4263 /* NEUMANN PROBLEM */ 4264 A_RR = 0; 4265 if (neumann) { 4266 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4267 PetscInt ibs,mbs; 4268 PetscBool issbaij; 4269 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4270 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4271 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4272 if (pcbddc->ksp_R) { /* already created ksp */ 4273 PetscInt nn_R; 4274 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4275 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4276 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4277 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4278 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4279 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4280 reuse = MAT_INITIAL_MATRIX; 4281 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4282 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4283 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4284 reuse = MAT_INITIAL_MATRIX; 4285 } else { /* safe to reuse the matrix */ 4286 reuse = MAT_REUSE_MATRIX; 4287 } 4288 } 4289 /* last check */ 4290 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4291 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4292 reuse = MAT_INITIAL_MATRIX; 4293 } 4294 } else { /* first time, so we need to create the matrix */ 4295 reuse = MAT_INITIAL_MATRIX; 4296 } 4297 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4298 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4299 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4300 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4301 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4302 if (matis->A == pcbddc->local_mat) { 4303 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4304 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4305 } else { 4306 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4307 } 4308 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4309 if (matis->A == pcbddc->local_mat) { 4310 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4311 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4312 } else { 4313 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4314 } 4315 } 4316 /* extract A_RR */ 4317 if (sub_schurs && sub_schurs->reuse_solver) { 4318 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4319 4320 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4321 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4322 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4323 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4324 } else { 4325 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4326 } 4327 } else { 4328 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4329 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4330 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4331 } 4332 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4333 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4334 } 4335 if (pcbddc->local_mat->symmetric_set) { 4336 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4337 } 4338 if (!pcbddc->ksp_R) { /* create object if not present */ 4339 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4340 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4341 /* default */ 4342 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4343 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4344 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4345 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4346 if (issbaij) { 4347 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4348 } else { 4349 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4350 } 4351 /* Allow user's customization */ 4352 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4353 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4354 } 4355 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4356 if (!n_R) { 4357 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4358 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4359 } 4360 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4361 /* Reuse solver if it is present */ 4362 if (sub_schurs && sub_schurs->reuse_solver) { 4363 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4364 4365 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4366 } 4367 /* Set Up KSP for Neumann problem of BDDC */ 4368 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4369 } 4370 4371 if (pcbddc->dbg_flag) { 4372 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4373 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4374 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4375 } 4376 4377 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4378 check_corr[0] = check_corr[1] = PETSC_FALSE; 4379 if (pcbddc->NullSpace_corr[0]) { 4380 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4381 } 4382 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4383 check_corr[0] = PETSC_TRUE; 4384 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4385 } 4386 if (neumann && pcbddc->NullSpace_corr[2]) { 4387 check_corr[1] = PETSC_TRUE; 4388 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4389 } 4390 4391 /* check Dirichlet and Neumann solvers */ 4392 if (pcbddc->dbg_flag) { 4393 if (dirichlet) { /* Dirichlet */ 4394 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4395 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4396 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4397 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4398 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4399 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4400 if (check_corr[0]) { 4401 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4402 } 4403 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4404 } 4405 if (neumann) { /* Neumann */ 4406 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4407 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4408 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4409 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4410 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4412 if (check_corr[1]) { 4413 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4414 } 4415 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4416 } 4417 } 4418 /* free Neumann problem's matrix */ 4419 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4420 PetscFunctionReturn(0); 4421 } 4422 4423 #undef __FUNCT__ 4424 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4425 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4426 { 4427 PetscErrorCode ierr; 4428 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4429 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4430 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4431 4432 PetscFunctionBegin; 4433 if (!reuse_solver) { 4434 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4435 } 4436 if (!pcbddc->switch_static) { 4437 if (applytranspose && pcbddc->local_auxmat1) { 4438 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4439 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4440 } 4441 if (!reuse_solver) { 4442 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4443 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4444 } else { 4445 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4446 4447 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4448 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4449 } 4450 } else { 4451 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4452 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4453 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4454 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4455 if (applytranspose && pcbddc->local_auxmat1) { 4456 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4457 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4458 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4459 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4460 } 4461 } 4462 if (!reuse_solver || pcbddc->switch_static) { 4463 if (applytranspose) { 4464 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4465 } else { 4466 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4467 } 4468 } else { 4469 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4470 4471 if (applytranspose) { 4472 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4473 } else { 4474 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4475 } 4476 } 4477 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4478 if (!pcbddc->switch_static) { 4479 if (!reuse_solver) { 4480 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4481 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4482 } else { 4483 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4484 4485 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4486 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4487 } 4488 if (!applytranspose && pcbddc->local_auxmat1) { 4489 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4490 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4491 } 4492 } else { 4493 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4494 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4495 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4496 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4497 if (!applytranspose && pcbddc->local_auxmat1) { 4498 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4499 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4500 } 4501 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4502 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4503 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4504 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4505 } 4506 PetscFunctionReturn(0); 4507 } 4508 4509 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4510 #undef __FUNCT__ 4511 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4512 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4513 { 4514 PetscErrorCode ierr; 4515 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4516 PC_IS* pcis = (PC_IS*) (pc->data); 4517 const PetscScalar zero = 0.0; 4518 4519 PetscFunctionBegin; 4520 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4521 if (!pcbddc->benign_apply_coarse_only) { 4522 if (applytranspose) { 4523 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4524 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4525 } else { 4526 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4527 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4528 } 4529 } else { 4530 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4531 } 4532 4533 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4534 if (pcbddc->benign_n) { 4535 PetscScalar *array; 4536 PetscInt j; 4537 4538 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4539 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4540 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4541 } 4542 4543 /* start communications from local primal nodes to rhs of coarse solver */ 4544 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4545 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4546 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4547 4548 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4549 if (pcbddc->coarse_ksp) { 4550 Mat coarse_mat; 4551 Vec rhs,sol; 4552 MatNullSpace nullsp; 4553 PetscBool isbddc = PETSC_FALSE; 4554 4555 if (pcbddc->benign_have_null) { 4556 PC coarse_pc; 4557 4558 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4559 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4560 /* we need to propagate to coarser levels the need for a possible benign correction */ 4561 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4562 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4563 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4564 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4565 } 4566 } 4567 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4568 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4569 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4570 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4571 if (nullsp) { 4572 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4573 } 4574 if (applytranspose) { 4575 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4576 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4577 } else { 4578 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4579 PC coarse_pc; 4580 4581 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4582 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4583 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4584 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4585 } else { 4586 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4587 } 4588 } 4589 /* we don't need the benign correction at coarser levels anymore */ 4590 if (pcbddc->benign_have_null && isbddc) { 4591 PC coarse_pc; 4592 PC_BDDC* coarsepcbddc; 4593 4594 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4595 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4596 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4597 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4598 } 4599 if (nullsp) { 4600 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4601 } 4602 } 4603 4604 /* Local solution on R nodes */ 4605 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4606 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4607 } 4608 /* communications from coarse sol to local primal nodes */ 4609 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4610 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4611 4612 /* Sum contributions from the two levels */ 4613 if (!pcbddc->benign_apply_coarse_only) { 4614 if (applytranspose) { 4615 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4616 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4617 } else { 4618 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4619 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4620 } 4621 /* store p0 */ 4622 if (pcbddc->benign_n) { 4623 PetscScalar *array; 4624 PetscInt j; 4625 4626 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4627 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4628 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4629 } 4630 } else { /* expand the coarse solution */ 4631 if (applytranspose) { 4632 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4633 } else { 4634 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4635 } 4636 } 4637 PetscFunctionReturn(0); 4638 } 4639 4640 #undef __FUNCT__ 4641 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4642 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4643 { 4644 PetscErrorCode ierr; 4645 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4646 PetscScalar *array; 4647 Vec from,to; 4648 4649 PetscFunctionBegin; 4650 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4651 from = pcbddc->coarse_vec; 4652 to = pcbddc->vec1_P; 4653 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4654 Vec tvec; 4655 4656 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4657 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4658 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4659 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4660 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4661 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4662 } 4663 } else { /* from local to global -> put data in coarse right hand side */ 4664 from = pcbddc->vec1_P; 4665 to = pcbddc->coarse_vec; 4666 } 4667 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4668 PetscFunctionReturn(0); 4669 } 4670 4671 #undef __FUNCT__ 4672 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4673 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4674 { 4675 PetscErrorCode ierr; 4676 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4677 PetscScalar *array; 4678 Vec from,to; 4679 4680 PetscFunctionBegin; 4681 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4682 from = pcbddc->coarse_vec; 4683 to = pcbddc->vec1_P; 4684 } else { /* from local to global -> put data in coarse right hand side */ 4685 from = pcbddc->vec1_P; 4686 to = pcbddc->coarse_vec; 4687 } 4688 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4689 if (smode == SCATTER_FORWARD) { 4690 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4691 Vec tvec; 4692 4693 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4694 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4695 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4696 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4697 } 4698 } else { 4699 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4700 ierr = VecResetArray(from);CHKERRQ(ierr); 4701 } 4702 } 4703 PetscFunctionReturn(0); 4704 } 4705 4706 /* uncomment for testing purposes */ 4707 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4708 #undef __FUNCT__ 4709 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4710 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4711 { 4712 PetscErrorCode ierr; 4713 PC_IS* pcis = (PC_IS*)(pc->data); 4714 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4715 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4716 /* one and zero */ 4717 PetscScalar one=1.0,zero=0.0; 4718 /* space to store constraints and their local indices */ 4719 PetscScalar *constraints_data; 4720 PetscInt *constraints_idxs,*constraints_idxs_B; 4721 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4722 PetscInt *constraints_n; 4723 /* iterators */ 4724 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4725 /* BLAS integers */ 4726 PetscBLASInt lwork,lierr; 4727 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4728 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4729 /* reuse */ 4730 PetscInt olocal_primal_size,olocal_primal_size_cc; 4731 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4732 /* change of basis */ 4733 PetscBool qr_needed; 4734 PetscBT change_basis,qr_needed_idx; 4735 /* auxiliary stuff */ 4736 PetscInt *nnz,*is_indices; 4737 PetscInt ncc; 4738 /* some quantities */ 4739 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4740 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4741 4742 PetscFunctionBegin; 4743 /* Destroy Mat objects computed previously */ 4744 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4745 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4746 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4747 /* save info on constraints from previous setup (if any) */ 4748 olocal_primal_size = pcbddc->local_primal_size; 4749 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4750 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4751 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4752 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4753 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4754 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4755 4756 if (!pcbddc->adaptive_selection) { 4757 IS ISForVertices,*ISForFaces,*ISForEdges; 4758 MatNullSpace nearnullsp; 4759 const Vec *nearnullvecs; 4760 Vec *localnearnullsp; 4761 PetscScalar *array; 4762 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4763 PetscBool nnsp_has_cnst; 4764 /* LAPACK working arrays for SVD or POD */ 4765 PetscBool skip_lapack,boolforchange; 4766 PetscScalar *work; 4767 PetscReal *singular_vals; 4768 #if defined(PETSC_USE_COMPLEX) 4769 PetscReal *rwork; 4770 #endif 4771 #if defined(PETSC_MISSING_LAPACK_GESVD) 4772 PetscScalar *temp_basis,*correlation_mat; 4773 #else 4774 PetscBLASInt dummy_int=1; 4775 PetscScalar dummy_scalar=1.; 4776 #endif 4777 4778 /* Get index sets for faces, edges and vertices from graph */ 4779 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4780 /* print some info */ 4781 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 4782 PetscInt nv; 4783 4784 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4785 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4786 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4787 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4788 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4789 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4790 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4791 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4792 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4793 } 4794 4795 /* free unneeded index sets */ 4796 if (!pcbddc->use_vertices) { 4797 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4798 } 4799 if (!pcbddc->use_edges) { 4800 for (i=0;i<n_ISForEdges;i++) { 4801 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4802 } 4803 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4804 n_ISForEdges = 0; 4805 } 4806 if (!pcbddc->use_faces) { 4807 for (i=0;i<n_ISForFaces;i++) { 4808 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4809 } 4810 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4811 n_ISForFaces = 0; 4812 } 4813 4814 /* check if near null space is attached to global mat */ 4815 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4816 if (nearnullsp) { 4817 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4818 /* remove any stored info */ 4819 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4820 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4821 /* store information for BDDC solver reuse */ 4822 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4823 pcbddc->onearnullspace = nearnullsp; 4824 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4825 for (i=0;i<nnsp_size;i++) { 4826 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4827 } 4828 } else { /* if near null space is not provided BDDC uses constants by default */ 4829 nnsp_size = 0; 4830 nnsp_has_cnst = PETSC_TRUE; 4831 } 4832 /* get max number of constraints on a single cc */ 4833 max_constraints = nnsp_size; 4834 if (nnsp_has_cnst) max_constraints++; 4835 4836 /* 4837 Evaluate maximum storage size needed by the procedure 4838 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4839 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4840 There can be multiple constraints per connected component 4841 */ 4842 n_vertices = 0; 4843 if (ISForVertices) { 4844 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4845 } 4846 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4847 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4848 4849 total_counts = n_ISForFaces+n_ISForEdges; 4850 total_counts *= max_constraints; 4851 total_counts += n_vertices; 4852 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4853 4854 total_counts = 0; 4855 max_size_of_constraint = 0; 4856 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4857 IS used_is; 4858 if (i<n_ISForEdges) { 4859 used_is = ISForEdges[i]; 4860 } else { 4861 used_is = ISForFaces[i-n_ISForEdges]; 4862 } 4863 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4864 total_counts += j; 4865 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4866 } 4867 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4868 4869 /* get local part of global near null space vectors */ 4870 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4871 for (k=0;k<nnsp_size;k++) { 4872 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4873 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4874 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4875 } 4876 4877 /* whether or not to skip lapack calls */ 4878 skip_lapack = PETSC_TRUE; 4879 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4880 4881 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4882 if (!skip_lapack) { 4883 PetscScalar temp_work; 4884 4885 #if defined(PETSC_MISSING_LAPACK_GESVD) 4886 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4887 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4888 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4889 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4890 #if defined(PETSC_USE_COMPLEX) 4891 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4892 #endif 4893 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4894 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4895 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4896 lwork = -1; 4897 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4898 #if !defined(PETSC_USE_COMPLEX) 4899 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4900 #else 4901 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4902 #endif 4903 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4904 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4905 #else /* on missing GESVD */ 4906 /* SVD */ 4907 PetscInt max_n,min_n; 4908 max_n = max_size_of_constraint; 4909 min_n = max_constraints; 4910 if (max_size_of_constraint < max_constraints) { 4911 min_n = max_size_of_constraint; 4912 max_n = max_constraints; 4913 } 4914 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 4915 #if defined(PETSC_USE_COMPLEX) 4916 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 4917 #endif 4918 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4919 lwork = -1; 4920 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 4921 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 4922 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 4923 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4924 #if !defined(PETSC_USE_COMPLEX) 4925 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 4926 #else 4927 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 4928 #endif 4929 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4930 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 4931 #endif /* on missing GESVD */ 4932 /* Allocate optimal workspace */ 4933 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 4934 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 4935 } 4936 /* Now we can loop on constraining sets */ 4937 total_counts = 0; 4938 constraints_idxs_ptr[0] = 0; 4939 constraints_data_ptr[0] = 0; 4940 /* vertices */ 4941 if (n_vertices) { 4942 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4943 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4944 for (i=0;i<n_vertices;i++) { 4945 constraints_n[total_counts] = 1; 4946 constraints_data[total_counts] = 1.0; 4947 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 4948 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 4949 total_counts++; 4950 } 4951 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4952 n_vertices = total_counts; 4953 } 4954 4955 /* edges and faces */ 4956 total_counts_cc = total_counts; 4957 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 4958 IS used_is; 4959 PetscBool idxs_copied = PETSC_FALSE; 4960 4961 if (ncc<n_ISForEdges) { 4962 used_is = ISForEdges[ncc]; 4963 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 4964 } else { 4965 used_is = ISForFaces[ncc-n_ISForEdges]; 4966 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 4967 } 4968 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 4969 4970 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 4971 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4972 /* change of basis should not be performed on local periodic nodes */ 4973 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 4974 if (nnsp_has_cnst) { 4975 PetscScalar quad_value; 4976 4977 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4978 idxs_copied = PETSC_TRUE; 4979 4980 if (!pcbddc->use_nnsp_true) { 4981 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 4982 } else { 4983 quad_value = 1.0; 4984 } 4985 for (j=0;j<size_of_constraint;j++) { 4986 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 4987 } 4988 temp_constraints++; 4989 total_counts++; 4990 } 4991 for (k=0;k<nnsp_size;k++) { 4992 PetscReal real_value; 4993 PetscScalar *ptr_to_data; 4994 4995 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4996 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 4997 for (j=0;j<size_of_constraint;j++) { 4998 ptr_to_data[j] = array[is_indices[j]]; 4999 } 5000 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5001 /* check if array is null on the connected component */ 5002 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5003 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5004 if (real_value > 0.0) { /* keep indices and values */ 5005 temp_constraints++; 5006 total_counts++; 5007 if (!idxs_copied) { 5008 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5009 idxs_copied = PETSC_TRUE; 5010 } 5011 } 5012 } 5013 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5014 valid_constraints = temp_constraints; 5015 if (!pcbddc->use_nnsp_true && temp_constraints) { 5016 if (temp_constraints == 1) { /* just normalize the constraint */ 5017 PetscScalar norm,*ptr_to_data; 5018 5019 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5020 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5021 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5022 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5023 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5024 } else { /* perform SVD */ 5025 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5026 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5027 5028 #if defined(PETSC_MISSING_LAPACK_GESVD) 5029 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5030 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5031 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5032 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5033 from that computed using LAPACKgesvd 5034 -> This is due to a different computation of eigenvectors in LAPACKheev 5035 -> The quality of the POD-computed basis will be the same */ 5036 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5037 /* Store upper triangular part of correlation matrix */ 5038 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5039 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5040 for (j=0;j<temp_constraints;j++) { 5041 for (k=0;k<j+1;k++) { 5042 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5043 } 5044 } 5045 /* compute eigenvalues and eigenvectors of correlation matrix */ 5046 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5047 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5048 #if !defined(PETSC_USE_COMPLEX) 5049 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5050 #else 5051 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5052 #endif 5053 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5054 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5055 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5056 j = 0; 5057 while (j < temp_constraints && singular_vals[j] < tol) j++; 5058 total_counts = total_counts-j; 5059 valid_constraints = temp_constraints-j; 5060 /* scale and copy POD basis into used quadrature memory */ 5061 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5062 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5063 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5064 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5065 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5066 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5067 if (j<temp_constraints) { 5068 PetscInt ii; 5069 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5070 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5071 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5072 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5073 for (k=0;k<temp_constraints-j;k++) { 5074 for (ii=0;ii<size_of_constraint;ii++) { 5075 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5076 } 5077 } 5078 } 5079 #else /* on missing GESVD */ 5080 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5081 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5082 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5083 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5084 #if !defined(PETSC_USE_COMPLEX) 5085 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5086 #else 5087 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5088 #endif 5089 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5090 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5091 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5092 k = temp_constraints; 5093 if (k > size_of_constraint) k = size_of_constraint; 5094 j = 0; 5095 while (j < k && singular_vals[k-j-1] < tol) j++; 5096 valid_constraints = k-j; 5097 total_counts = total_counts-temp_constraints+valid_constraints; 5098 #endif /* on missing GESVD */ 5099 } 5100 } 5101 /* update pointers information */ 5102 if (valid_constraints) { 5103 constraints_n[total_counts_cc] = valid_constraints; 5104 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5105 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5106 /* set change_of_basis flag */ 5107 if (boolforchange) { 5108 PetscBTSet(change_basis,total_counts_cc); 5109 } 5110 total_counts_cc++; 5111 } 5112 } 5113 /* free workspace */ 5114 if (!skip_lapack) { 5115 ierr = PetscFree(work);CHKERRQ(ierr); 5116 #if defined(PETSC_USE_COMPLEX) 5117 ierr = PetscFree(rwork);CHKERRQ(ierr); 5118 #endif 5119 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5120 #if defined(PETSC_MISSING_LAPACK_GESVD) 5121 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5122 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5123 #endif 5124 } 5125 for (k=0;k<nnsp_size;k++) { 5126 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5127 } 5128 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5129 /* free index sets of faces, edges and vertices */ 5130 for (i=0;i<n_ISForFaces;i++) { 5131 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5132 } 5133 if (n_ISForFaces) { 5134 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5135 } 5136 for (i=0;i<n_ISForEdges;i++) { 5137 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5138 } 5139 if (n_ISForEdges) { 5140 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5141 } 5142 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5143 } else { 5144 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5145 5146 total_counts = 0; 5147 n_vertices = 0; 5148 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5149 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5150 } 5151 max_constraints = 0; 5152 total_counts_cc = 0; 5153 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5154 total_counts += pcbddc->adaptive_constraints_n[i]; 5155 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5156 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5157 } 5158 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5159 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5160 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5161 constraints_data = pcbddc->adaptive_constraints_data; 5162 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5163 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5164 total_counts_cc = 0; 5165 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5166 if (pcbddc->adaptive_constraints_n[i]) { 5167 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5168 } 5169 } 5170 #if 0 5171 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5172 for (i=0;i<total_counts_cc;i++) { 5173 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5174 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5175 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5176 printf(" %d",constraints_idxs[j]); 5177 } 5178 printf("\n"); 5179 printf("number of cc: %d\n",constraints_n[i]); 5180 } 5181 for (i=0;i<n_vertices;i++) { 5182 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5183 } 5184 for (i=0;i<sub_schurs->n_subs;i++) { 5185 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5186 } 5187 #endif 5188 5189 max_size_of_constraint = 0; 5190 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5191 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5192 /* Change of basis */ 5193 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5194 if (pcbddc->use_change_of_basis) { 5195 for (i=0;i<sub_schurs->n_subs;i++) { 5196 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5197 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5198 } 5199 } 5200 } 5201 } 5202 pcbddc->local_primal_size = total_counts; 5203 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5204 5205 /* map constraints_idxs in boundary numbering */ 5206 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5207 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5208 5209 /* Create constraint matrix */ 5210 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5211 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5212 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5213 5214 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5215 /* determine if a QR strategy is needed for change of basis */ 5216 qr_needed = PETSC_FALSE; 5217 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5218 total_primal_vertices=0; 5219 pcbddc->local_primal_size_cc = 0; 5220 for (i=0;i<total_counts_cc;i++) { 5221 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5222 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5223 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5224 pcbddc->local_primal_size_cc += 1; 5225 } else if (PetscBTLookup(change_basis,i)) { 5226 for (k=0;k<constraints_n[i];k++) { 5227 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5228 } 5229 pcbddc->local_primal_size_cc += constraints_n[i]; 5230 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5231 PetscBTSet(qr_needed_idx,i); 5232 qr_needed = PETSC_TRUE; 5233 } 5234 } else { 5235 pcbddc->local_primal_size_cc += 1; 5236 } 5237 } 5238 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5239 pcbddc->n_vertices = total_primal_vertices; 5240 /* permute indices in order to have a sorted set of vertices */ 5241 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5242 5243 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5244 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5245 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5246 5247 /* nonzero structure of constraint matrix */ 5248 /* and get reference dof for local constraints */ 5249 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5250 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5251 5252 j = total_primal_vertices; 5253 total_counts = total_primal_vertices; 5254 cum = total_primal_vertices; 5255 for (i=n_vertices;i<total_counts_cc;i++) { 5256 if (!PetscBTLookup(change_basis,i)) { 5257 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5258 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5259 cum++; 5260 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5261 for (k=0;k<constraints_n[i];k++) { 5262 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5263 nnz[j+k] = size_of_constraint; 5264 } 5265 j += constraints_n[i]; 5266 } 5267 } 5268 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5269 ierr = PetscFree(nnz);CHKERRQ(ierr); 5270 5271 /* set values in constraint matrix */ 5272 for (i=0;i<total_primal_vertices;i++) { 5273 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5274 } 5275 total_counts = total_primal_vertices; 5276 for (i=n_vertices;i<total_counts_cc;i++) { 5277 if (!PetscBTLookup(change_basis,i)) { 5278 PetscInt *cols; 5279 5280 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5281 cols = constraints_idxs+constraints_idxs_ptr[i]; 5282 for (k=0;k<constraints_n[i];k++) { 5283 PetscInt row = total_counts+k; 5284 PetscScalar *vals; 5285 5286 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5287 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5288 } 5289 total_counts += constraints_n[i]; 5290 } 5291 } 5292 /* assembling */ 5293 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5294 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5295 5296 /* 5297 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5298 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5299 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5300 */ 5301 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5302 if (pcbddc->use_change_of_basis) { 5303 /* dual and primal dofs on a single cc */ 5304 PetscInt dual_dofs,primal_dofs; 5305 /* working stuff for GEQRF */ 5306 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5307 PetscBLASInt lqr_work; 5308 /* working stuff for UNGQR */ 5309 PetscScalar *gqr_work,lgqr_work_t; 5310 PetscBLASInt lgqr_work; 5311 /* working stuff for TRTRS */ 5312 PetscScalar *trs_rhs; 5313 PetscBLASInt Blas_NRHS; 5314 /* pointers for values insertion into change of basis matrix */ 5315 PetscInt *start_rows,*start_cols; 5316 PetscScalar *start_vals; 5317 /* working stuff for values insertion */ 5318 PetscBT is_primal; 5319 PetscInt *aux_primal_numbering_B; 5320 /* matrix sizes */ 5321 PetscInt global_size,local_size; 5322 /* temporary change of basis */ 5323 Mat localChangeOfBasisMatrix; 5324 /* extra space for debugging */ 5325 PetscScalar *dbg_work; 5326 5327 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5328 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5329 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5330 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5331 /* nonzeros for local mat */ 5332 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5333 if (!pcbddc->benign_change || pcbddc->fake_change) { 5334 for (i=0;i<pcis->n;i++) nnz[i]=1; 5335 } else { 5336 const PetscInt *ii; 5337 PetscInt n; 5338 PetscBool flg_row; 5339 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5340 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5341 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5342 } 5343 for (i=n_vertices;i<total_counts_cc;i++) { 5344 if (PetscBTLookup(change_basis,i)) { 5345 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5346 if (PetscBTLookup(qr_needed_idx,i)) { 5347 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5348 } else { 5349 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5350 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5351 } 5352 } 5353 } 5354 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5355 ierr = PetscFree(nnz);CHKERRQ(ierr); 5356 /* Set interior change in the matrix */ 5357 if (!pcbddc->benign_change || pcbddc->fake_change) { 5358 for (i=0;i<pcis->n;i++) { 5359 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5360 } 5361 } else { 5362 const PetscInt *ii,*jj; 5363 PetscScalar *aa; 5364 PetscInt n; 5365 PetscBool flg_row; 5366 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5367 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5368 for (i=0;i<n;i++) { 5369 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5370 } 5371 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5372 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5373 } 5374 5375 if (pcbddc->dbg_flag) { 5376 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5377 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5378 } 5379 5380 5381 /* Now we loop on the constraints which need a change of basis */ 5382 /* 5383 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5384 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5385 5386 Basic blocks of change of basis matrix T computed by 5387 5388 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5389 5390 | 1 0 ... 0 s_1/S | 5391 | 0 1 ... 0 s_2/S | 5392 | ... | 5393 | 0 ... 1 s_{n-1}/S | 5394 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5395 5396 with S = \sum_{i=1}^n s_i^2 5397 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5398 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5399 5400 - QR decomposition of constraints otherwise 5401 */ 5402 if (qr_needed) { 5403 /* space to store Q */ 5404 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5405 /* array to store scaling factors for reflectors */ 5406 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5407 /* first we issue queries for optimal work */ 5408 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5409 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5410 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5411 lqr_work = -1; 5412 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5413 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5414 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5415 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5416 lgqr_work = -1; 5417 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5418 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5419 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5420 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5421 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5422 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5423 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5424 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5425 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5426 /* array to store rhs and solution of triangular solver */ 5427 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5428 /* allocating workspace for check */ 5429 if (pcbddc->dbg_flag) { 5430 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5431 } 5432 } 5433 /* array to store whether a node is primal or not */ 5434 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5435 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5436 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5437 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5438 for (i=0;i<total_primal_vertices;i++) { 5439 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5440 } 5441 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5442 5443 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5444 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5445 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5446 if (PetscBTLookup(change_basis,total_counts)) { 5447 /* get constraint info */ 5448 primal_dofs = constraints_n[total_counts]; 5449 dual_dofs = size_of_constraint-primal_dofs; 5450 5451 if (pcbddc->dbg_flag) { 5452 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5453 } 5454 5455 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5456 5457 /* copy quadrature constraints for change of basis check */ 5458 if (pcbddc->dbg_flag) { 5459 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5460 } 5461 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5462 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5463 5464 /* compute QR decomposition of constraints */ 5465 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5466 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5467 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5468 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5469 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5470 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5471 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5472 5473 /* explictly compute R^-T */ 5474 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5475 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5476 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5477 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5478 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5479 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5480 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5481 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5482 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5483 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5484 5485 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5486 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5487 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5488 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5489 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5490 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5491 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5492 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5493 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5494 5495 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5496 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5497 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5499 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5500 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5501 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5502 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5503 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5504 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5505 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5506 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5507 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5508 5509 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5510 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5511 /* insert cols for primal dofs */ 5512 for (j=0;j<primal_dofs;j++) { 5513 start_vals = &qr_basis[j*size_of_constraint]; 5514 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5515 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5516 } 5517 /* insert cols for dual dofs */ 5518 for (j=0,k=0;j<dual_dofs;k++) { 5519 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5520 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5521 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5522 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5523 j++; 5524 } 5525 } 5526 5527 /* check change of basis */ 5528 if (pcbddc->dbg_flag) { 5529 PetscInt ii,jj; 5530 PetscBool valid_qr=PETSC_TRUE; 5531 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5533 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5535 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5536 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5537 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5538 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5539 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5540 for (jj=0;jj<size_of_constraint;jj++) { 5541 for (ii=0;ii<primal_dofs;ii++) { 5542 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5543 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5544 } 5545 } 5546 if (!valid_qr) { 5547 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5548 for (jj=0;jj<size_of_constraint;jj++) { 5549 for (ii=0;ii<primal_dofs;ii++) { 5550 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5551 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5552 } 5553 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5554 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5555 } 5556 } 5557 } 5558 } else { 5559 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5560 } 5561 } 5562 } else { /* simple transformation block */ 5563 PetscInt row,col; 5564 PetscScalar val,norm; 5565 5566 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5567 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5568 for (j=0;j<size_of_constraint;j++) { 5569 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5570 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5571 if (!PetscBTLookup(is_primal,row_B)) { 5572 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5573 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5574 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5575 } else { 5576 for (k=0;k<size_of_constraint;k++) { 5577 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5578 if (row != col) { 5579 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5580 } else { 5581 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5582 } 5583 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5584 } 5585 } 5586 } 5587 if (pcbddc->dbg_flag) { 5588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5589 } 5590 } 5591 } else { 5592 if (pcbddc->dbg_flag) { 5593 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5594 } 5595 } 5596 } 5597 5598 /* free workspace */ 5599 if (qr_needed) { 5600 if (pcbddc->dbg_flag) { 5601 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5602 } 5603 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5604 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5605 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5606 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5607 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5608 } 5609 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5610 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5611 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5612 5613 /* assembling of global change of variable */ 5614 if (!pcbddc->fake_change) { 5615 Mat tmat; 5616 PetscInt bs; 5617 5618 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5619 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5620 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5621 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5622 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5623 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5624 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5625 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5626 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5627 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5628 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5629 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5630 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5631 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5632 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5633 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5634 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5635 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5636 5637 /* check */ 5638 if (pcbddc->dbg_flag) { 5639 PetscReal error; 5640 Vec x,x_change; 5641 5642 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5643 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5644 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5645 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5646 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5647 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5648 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5649 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5650 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5651 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5652 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5653 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5654 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5655 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5656 ierr = VecDestroy(&x);CHKERRQ(ierr); 5657 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5658 } 5659 /* adapt sub_schurs computed (if any) */ 5660 if (pcbddc->use_deluxe_scaling) { 5661 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5662 5663 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5664 if (sub_schurs && sub_schurs->S_Ej_all) { 5665 Mat S_new,tmat; 5666 IS is_all_N,is_V_Sall = NULL; 5667 5668 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5669 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5670 if (pcbddc->deluxe_zerorows) { 5671 ISLocalToGlobalMapping NtoSall; 5672 IS is_V; 5673 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5674 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5675 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5676 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5677 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5678 } 5679 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5680 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5681 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5682 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5683 if (pcbddc->deluxe_zerorows) { 5684 const PetscScalar *array; 5685 const PetscInt *idxs_V,*idxs_all; 5686 PetscInt i,n_V; 5687 5688 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5689 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5690 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5691 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5692 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5693 for (i=0;i<n_V;i++) { 5694 PetscScalar val; 5695 PetscInt idx; 5696 5697 idx = idxs_V[i]; 5698 val = array[idxs_all[idxs_V[i]]]; 5699 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5700 } 5701 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5702 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5703 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5704 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5705 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5706 } 5707 sub_schurs->S_Ej_all = S_new; 5708 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5709 if (sub_schurs->sum_S_Ej_all) { 5710 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5711 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5712 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5713 if (pcbddc->deluxe_zerorows) { 5714 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5715 } 5716 sub_schurs->sum_S_Ej_all = S_new; 5717 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5718 } 5719 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5720 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5721 } 5722 /* destroy any change of basis context in sub_schurs */ 5723 if (sub_schurs && sub_schurs->change) { 5724 PetscInt i; 5725 5726 for (i=0;i<sub_schurs->n_subs;i++) { 5727 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5728 } 5729 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5730 } 5731 } 5732 if (pcbddc->switch_static) { /* need to save the local change */ 5733 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5734 } else { 5735 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5736 } 5737 /* determine if any process has changed the pressures locally */ 5738 pcbddc->change_interior = pcbddc->benign_have_null; 5739 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5740 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5741 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5742 pcbddc->use_qr_single = qr_needed; 5743 } 5744 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5745 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5746 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5747 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5748 } else { 5749 Mat benign_global = NULL; 5750 if (pcbddc->benign_have_null) { 5751 Mat tmat; 5752 5753 pcbddc->change_interior = PETSC_TRUE; 5754 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5755 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5756 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5757 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5758 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5759 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5760 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5761 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5762 if (pcbddc->benign_change) { 5763 Mat M; 5764 5765 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5766 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5767 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5768 ierr = MatDestroy(&M);CHKERRQ(ierr); 5769 } else { 5770 Mat eye; 5771 PetscScalar *array; 5772 5773 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5774 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5775 for (i=0;i<pcis->n;i++) { 5776 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5777 } 5778 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5779 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5780 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5781 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5782 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5783 } 5784 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5785 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5786 } 5787 if (pcbddc->user_ChangeOfBasisMatrix) { 5788 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5789 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5790 } else if (pcbddc->benign_have_null) { 5791 pcbddc->ChangeOfBasisMatrix = benign_global; 5792 } 5793 } 5794 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5795 IS is_global; 5796 const PetscInt *gidxs; 5797 5798 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5799 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5800 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5801 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5802 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5803 } 5804 } 5805 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5806 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5807 } 5808 5809 if (!pcbddc->fake_change) { 5810 /* add pressure dofs to set of primal nodes for numbering purposes */ 5811 for (i=0;i<pcbddc->benign_n;i++) { 5812 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5813 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5814 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5815 pcbddc->local_primal_size_cc++; 5816 pcbddc->local_primal_size++; 5817 } 5818 5819 /* check if a new primal space has been introduced (also take into account benign trick) */ 5820 pcbddc->new_primal_space_local = PETSC_TRUE; 5821 if (olocal_primal_size == pcbddc->local_primal_size) { 5822 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5823 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5824 if (!pcbddc->new_primal_space_local) { 5825 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5826 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5827 } 5828 } 5829 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5830 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5831 } 5832 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5833 5834 /* flush dbg viewer */ 5835 if (pcbddc->dbg_flag) { 5836 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5837 } 5838 5839 /* free workspace */ 5840 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5841 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5842 if (!pcbddc->adaptive_selection) { 5843 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5844 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5845 } else { 5846 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5847 pcbddc->adaptive_constraints_idxs_ptr, 5848 pcbddc->adaptive_constraints_data_ptr, 5849 pcbddc->adaptive_constraints_idxs, 5850 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5851 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5852 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5853 } 5854 PetscFunctionReturn(0); 5855 } 5856 5857 #undef __FUNCT__ 5858 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5859 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5860 { 5861 ISLocalToGlobalMapping map; 5862 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5863 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5864 PetscInt ierr,i,N; 5865 5866 PetscFunctionBegin; 5867 if (pcbddc->recompute_topography) { 5868 /* Reset previously computed graph */ 5869 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5870 /* Init local Graph struct */ 5871 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5872 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5873 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5874 5875 /* Check validity of the csr graph passed in by the user */ 5876 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5877 5878 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5879 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5880 PetscInt *xadj,*adjncy; 5881 PetscInt nvtxs; 5882 PetscBool flg_row=PETSC_FALSE; 5883 5884 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5885 if (flg_row) { 5886 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5887 pcbddc->computed_rowadj = PETSC_TRUE; 5888 } 5889 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5890 } 5891 if (pcbddc->dbg_flag) { 5892 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5893 } 5894 5895 /* Setup of Graph */ 5896 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5897 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5898 5899 /* attach info on disconnected subdomains if present */ 5900 if (pcbddc->n_local_subs) { 5901 PetscInt *local_subs; 5902 5903 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5904 for (i=0;i<pcbddc->n_local_subs;i++) { 5905 const PetscInt *idxs; 5906 PetscInt nl,j; 5907 5908 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5909 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5910 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 5911 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5912 } 5913 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 5914 pcbddc->mat_graph->local_subs = local_subs; 5915 } 5916 pcbddc->graphanalyzed = PETSC_FALSE; 5917 } 5918 5919 if (pcbddc->graphanalyzed) { 5920 /* Graph's connected components analysis */ 5921 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 5922 pcbddc->graphanalyzed = PETSC_TRUE; 5923 } 5924 PetscFunctionReturn(0); 5925 } 5926 5927 #undef __FUNCT__ 5928 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 5929 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 5930 { 5931 PetscInt i,j; 5932 PetscScalar *alphas; 5933 PetscErrorCode ierr; 5934 5935 PetscFunctionBegin; 5936 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 5937 for (i=0;i<n;i++) { 5938 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 5939 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 5940 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 5941 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5942 } 5943 ierr = PetscFree(alphas);CHKERRQ(ierr); 5944 PetscFunctionReturn(0); 5945 } 5946 5947 #undef __FUNCT__ 5948 #define __FUNCT__ "MatISGetSubassemblingPattern" 5949 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5950 { 5951 Mat A; 5952 PetscInt n_neighs,*neighs,*n_shared,**shared; 5953 PetscMPIInt size,rank,color; 5954 PetscInt *xadj,*adjncy; 5955 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5956 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5957 PetscInt void_procs,*procs_candidates = NULL; 5958 PetscInt xadj_count, *count; 5959 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5960 PetscSubcomm psubcomm; 5961 MPI_Comm subcomm; 5962 PetscErrorCode ierr; 5963 5964 PetscFunctionBegin; 5965 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5966 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5967 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5968 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5969 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5970 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5971 5972 if (have_void) *have_void = PETSC_FALSE; 5973 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5974 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5975 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5976 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5977 im_active = !!(n); 5978 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5979 void_procs = size - active_procs; 5980 /* get ranks of of non-active processes in mat communicator */ 5981 if (void_procs) { 5982 PetscInt ncand; 5983 5984 if (have_void) *have_void = PETSC_TRUE; 5985 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5986 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5987 for (i=0,ncand=0;i<size;i++) { 5988 if (!procs_candidates[i]) { 5989 procs_candidates[ncand++] = i; 5990 } 5991 } 5992 /* force n_subdomains to be not greater that the number of non-active processes */ 5993 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5994 } 5995 5996 /* number of subdomains requested greater than active processes -> just shift the matrix 5997 number of subdomains requested 1 -> send to master or first candidate in voids */ 5998 if (active_procs < *n_subdomains || *n_subdomains == 1) { 5999 PetscInt issize,isidx,dest; 6000 if (*n_subdomains == 1) dest = 0; 6001 else dest = rank; 6002 if (im_active) { 6003 issize = 1; 6004 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6005 isidx = procs_candidates[dest]; 6006 } else { 6007 isidx = dest; 6008 } 6009 } else { 6010 issize = 0; 6011 isidx = -1; 6012 } 6013 *n_subdomains = active_procs; 6014 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6015 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6016 PetscFunctionReturn(0); 6017 } 6018 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6019 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6020 threshold = PetscMax(threshold,2); 6021 6022 /* Get info on mapping */ 6023 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 6024 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6025 6026 /* build local CSR graph of subdomains' connectivity */ 6027 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6028 xadj[0] = 0; 6029 xadj[1] = PetscMax(n_neighs-1,0); 6030 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6031 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6032 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 6033 for (i=1;i<n_neighs;i++) 6034 for (j=0;j<n_shared[i];j++) 6035 count[shared[i][j]] += 1; 6036 6037 xadj_count = 0; 6038 for (i=1;i<n_neighs;i++) { 6039 for (j=0;j<n_shared[i];j++) { 6040 if (count[shared[i][j]] < threshold) { 6041 adjncy[xadj_count] = neighs[i]; 6042 adjncy_wgt[xadj_count] = n_shared[i]; 6043 xadj_count++; 6044 break; 6045 } 6046 } 6047 } 6048 xadj[1] = xadj_count; 6049 ierr = PetscFree(count);CHKERRQ(ierr); 6050 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6051 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6052 6053 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6054 6055 /* Restrict work on active processes only */ 6056 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6057 if (void_procs) { 6058 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6059 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6060 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6061 subcomm = PetscSubcommChild(psubcomm); 6062 } else { 6063 psubcomm = NULL; 6064 subcomm = PetscObjectComm((PetscObject)mat); 6065 } 6066 6067 v_wgt = NULL; 6068 if (!color) { 6069 ierr = PetscFree(xadj);CHKERRQ(ierr); 6070 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6071 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6072 } else { 6073 Mat subdomain_adj; 6074 IS new_ranks,new_ranks_contig; 6075 MatPartitioning partitioner; 6076 PetscInt rstart=0,rend=0; 6077 PetscInt *is_indices,*oldranks; 6078 PetscMPIInt size; 6079 PetscBool aggregate; 6080 6081 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6082 if (void_procs) { 6083 PetscInt prank = rank; 6084 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6085 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6086 for (i=0;i<xadj[1];i++) { 6087 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6088 } 6089 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6090 } else { 6091 oldranks = NULL; 6092 } 6093 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6094 if (aggregate) { /* TODO: all this part could be made more efficient */ 6095 PetscInt lrows,row,ncols,*cols; 6096 PetscMPIInt nrank; 6097 PetscScalar *vals; 6098 6099 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6100 lrows = 0; 6101 if (nrank<redprocs) { 6102 lrows = size/redprocs; 6103 if (nrank<size%redprocs) lrows++; 6104 } 6105 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6106 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6107 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6108 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6109 row = nrank; 6110 ncols = xadj[1]-xadj[0]; 6111 cols = adjncy; 6112 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6113 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6114 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6115 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6116 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6117 ierr = PetscFree(xadj);CHKERRQ(ierr); 6118 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6119 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6120 ierr = PetscFree(vals);CHKERRQ(ierr); 6121 if (use_vwgt) { 6122 Vec v; 6123 const PetscScalar *array; 6124 PetscInt nl; 6125 6126 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6127 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6128 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6129 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6130 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6131 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6132 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6133 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6134 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6135 ierr = VecDestroy(&v);CHKERRQ(ierr); 6136 } 6137 } else { 6138 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6139 if (use_vwgt) { 6140 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6141 v_wgt[0] = local_size; 6142 } 6143 } 6144 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6145 6146 /* Partition */ 6147 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6148 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6149 if (v_wgt) { 6150 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6151 } 6152 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6153 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6154 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6155 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6156 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6157 6158 /* renumber new_ranks to avoid "holes" in new set of processors */ 6159 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6160 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6161 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6162 if (!aggregate) { 6163 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6164 #if defined(PETSC_USE_DEBUG) 6165 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6166 #endif 6167 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6168 } else if (oldranks) { 6169 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6170 } else { 6171 ranks_send_to_idx[0] = is_indices[0]; 6172 } 6173 } else { 6174 PetscInt idxs[1]; 6175 PetscMPIInt tag; 6176 MPI_Request *reqs; 6177 6178 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6179 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6180 for (i=rstart;i<rend;i++) { 6181 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6182 } 6183 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6184 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6185 ierr = PetscFree(reqs);CHKERRQ(ierr); 6186 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6187 #if defined(PETSC_USE_DEBUG) 6188 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6189 #endif 6190 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6191 } else if (oldranks) { 6192 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6193 } else { 6194 ranks_send_to_idx[0] = idxs[0]; 6195 } 6196 } 6197 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6198 /* clean up */ 6199 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6200 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6201 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6202 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6203 } 6204 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6205 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6206 6207 /* assemble parallel IS for sends */ 6208 i = 1; 6209 if (!color) i=0; 6210 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6211 PetscFunctionReturn(0); 6212 } 6213 6214 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6215 6216 #undef __FUNCT__ 6217 #define __FUNCT__ "MatISSubassemble" 6218 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6219 { 6220 Mat local_mat; 6221 IS is_sends_internal; 6222 PetscInt rows,cols,new_local_rows; 6223 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6224 PetscBool ismatis,isdense,newisdense,destroy_mat; 6225 ISLocalToGlobalMapping l2gmap; 6226 PetscInt* l2gmap_indices; 6227 const PetscInt* is_indices; 6228 MatType new_local_type; 6229 /* buffers */ 6230 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6231 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6232 PetscInt *recv_buffer_idxs_local; 6233 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6234 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6235 /* MPI */ 6236 MPI_Comm comm,comm_n; 6237 PetscSubcomm subcomm; 6238 PetscMPIInt n_sends,n_recvs,commsize; 6239 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6240 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6241 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6242 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6243 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6244 PetscErrorCode ierr; 6245 6246 PetscFunctionBegin; 6247 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6248 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6249 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6250 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6251 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6252 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6253 PetscValidLogicalCollectiveBool(mat,reuse,6); 6254 PetscValidLogicalCollectiveInt(mat,nis,8); 6255 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6256 if (nvecs) { 6257 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6258 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6259 } 6260 /* further checks */ 6261 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6262 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6263 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6264 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6265 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6266 if (reuse && *mat_n) { 6267 PetscInt mrows,mcols,mnrows,mncols; 6268 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6269 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6270 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6271 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6272 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6273 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6274 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6275 } 6276 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6277 PetscValidLogicalCollectiveInt(mat,bs,0); 6278 6279 /* prepare IS for sending if not provided */ 6280 if (!is_sends) { 6281 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6282 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6283 } else { 6284 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6285 is_sends_internal = is_sends; 6286 } 6287 6288 /* get comm */ 6289 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6290 6291 /* compute number of sends */ 6292 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6293 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6294 6295 /* compute number of receives */ 6296 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6297 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6298 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6299 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6300 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6301 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6302 ierr = PetscFree(iflags);CHKERRQ(ierr); 6303 6304 /* restrict comm if requested */ 6305 subcomm = 0; 6306 destroy_mat = PETSC_FALSE; 6307 if (restrict_comm) { 6308 PetscMPIInt color,subcommsize; 6309 6310 color = 0; 6311 if (restrict_full) { 6312 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6313 } else { 6314 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6315 } 6316 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6317 subcommsize = commsize - subcommsize; 6318 /* check if reuse has been requested */ 6319 if (reuse) { 6320 if (*mat_n) { 6321 PetscMPIInt subcommsize2; 6322 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6323 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6324 comm_n = PetscObjectComm((PetscObject)*mat_n); 6325 } else { 6326 comm_n = PETSC_COMM_SELF; 6327 } 6328 } else { /* MAT_INITIAL_MATRIX */ 6329 PetscMPIInt rank; 6330 6331 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6332 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6333 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6334 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6335 comm_n = PetscSubcommChild(subcomm); 6336 } 6337 /* flag to destroy *mat_n if not significative */ 6338 if (color) destroy_mat = PETSC_TRUE; 6339 } else { 6340 comm_n = comm; 6341 } 6342 6343 /* prepare send/receive buffers */ 6344 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6345 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6346 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6347 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6348 if (nis) { 6349 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6350 } 6351 6352 /* Get data from local matrices */ 6353 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6354 /* TODO: See below some guidelines on how to prepare the local buffers */ 6355 /* 6356 send_buffer_vals should contain the raw values of the local matrix 6357 send_buffer_idxs should contain: 6358 - MatType_PRIVATE type 6359 - PetscInt size_of_l2gmap 6360 - PetscInt global_row_indices[size_of_l2gmap] 6361 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6362 */ 6363 else { 6364 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6365 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6366 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6367 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6368 send_buffer_idxs[1] = i; 6369 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6370 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6371 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6372 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6373 for (i=0;i<n_sends;i++) { 6374 ilengths_vals[is_indices[i]] = len*len; 6375 ilengths_idxs[is_indices[i]] = len+2; 6376 } 6377 } 6378 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6379 /* additional is (if any) */ 6380 if (nis) { 6381 PetscMPIInt psum; 6382 PetscInt j; 6383 for (j=0,psum=0;j<nis;j++) { 6384 PetscInt plen; 6385 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6386 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6387 psum += len+1; /* indices + lenght */ 6388 } 6389 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6390 for (j=0,psum=0;j<nis;j++) { 6391 PetscInt plen; 6392 const PetscInt *is_array_idxs; 6393 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6394 send_buffer_idxs_is[psum] = plen; 6395 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6396 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6397 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6398 psum += plen+1; /* indices + lenght */ 6399 } 6400 for (i=0;i<n_sends;i++) { 6401 ilengths_idxs_is[is_indices[i]] = psum; 6402 } 6403 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6404 } 6405 6406 buf_size_idxs = 0; 6407 buf_size_vals = 0; 6408 buf_size_idxs_is = 0; 6409 buf_size_vecs = 0; 6410 for (i=0;i<n_recvs;i++) { 6411 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6412 buf_size_vals += (PetscInt)olengths_vals[i]; 6413 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6414 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6415 } 6416 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6417 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6418 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6419 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6420 6421 /* get new tags for clean communications */ 6422 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6423 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6424 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6425 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6426 6427 /* allocate for requests */ 6428 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6429 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6430 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6431 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6432 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6433 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6434 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6435 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6436 6437 /* communications */ 6438 ptr_idxs = recv_buffer_idxs; 6439 ptr_vals = recv_buffer_vals; 6440 ptr_idxs_is = recv_buffer_idxs_is; 6441 ptr_vecs = recv_buffer_vecs; 6442 for (i=0;i<n_recvs;i++) { 6443 source_dest = onodes[i]; 6444 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6445 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6446 ptr_idxs += olengths_idxs[i]; 6447 ptr_vals += olengths_vals[i]; 6448 if (nis) { 6449 source_dest = onodes_is[i]; 6450 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6451 ptr_idxs_is += olengths_idxs_is[i]; 6452 } 6453 if (nvecs) { 6454 source_dest = onodes[i]; 6455 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6456 ptr_vecs += olengths_idxs[i]-2; 6457 } 6458 } 6459 for (i=0;i<n_sends;i++) { 6460 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6461 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6462 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6463 if (nis) { 6464 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6465 } 6466 if (nvecs) { 6467 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6468 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6469 } 6470 } 6471 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6472 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6473 6474 /* assemble new l2g map */ 6475 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6476 ptr_idxs = recv_buffer_idxs; 6477 new_local_rows = 0; 6478 for (i=0;i<n_recvs;i++) { 6479 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6480 ptr_idxs += olengths_idxs[i]; 6481 } 6482 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6483 ptr_idxs = recv_buffer_idxs; 6484 new_local_rows = 0; 6485 for (i=0;i<n_recvs;i++) { 6486 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6487 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6488 ptr_idxs += olengths_idxs[i]; 6489 } 6490 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6491 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6492 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6493 6494 /* infer new local matrix type from received local matrices type */ 6495 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6496 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6497 if (n_recvs) { 6498 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6499 ptr_idxs = recv_buffer_idxs; 6500 for (i=0;i<n_recvs;i++) { 6501 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6502 new_local_type_private = MATAIJ_PRIVATE; 6503 break; 6504 } 6505 ptr_idxs += olengths_idxs[i]; 6506 } 6507 switch (new_local_type_private) { 6508 case MATDENSE_PRIVATE: 6509 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6510 new_local_type = MATSEQAIJ; 6511 bs = 1; 6512 } else { /* if I receive only 1 dense matrix */ 6513 new_local_type = MATSEQDENSE; 6514 bs = 1; 6515 } 6516 break; 6517 case MATAIJ_PRIVATE: 6518 new_local_type = MATSEQAIJ; 6519 bs = 1; 6520 break; 6521 case MATBAIJ_PRIVATE: 6522 new_local_type = MATSEQBAIJ; 6523 break; 6524 case MATSBAIJ_PRIVATE: 6525 new_local_type = MATSEQSBAIJ; 6526 break; 6527 default: 6528 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6529 break; 6530 } 6531 } else { /* by default, new_local_type is seqdense */ 6532 new_local_type = MATSEQDENSE; 6533 bs = 1; 6534 } 6535 6536 /* create MATIS object if needed */ 6537 if (!reuse) { 6538 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6539 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6540 } else { 6541 /* it also destroys the local matrices */ 6542 if (*mat_n) { 6543 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6544 } else { /* this is a fake object */ 6545 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6546 } 6547 } 6548 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6549 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6550 6551 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6552 6553 /* Global to local map of received indices */ 6554 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6555 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6556 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6557 6558 /* restore attributes -> type of incoming data and its size */ 6559 buf_size_idxs = 0; 6560 for (i=0;i<n_recvs;i++) { 6561 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6562 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6563 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6564 } 6565 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6566 6567 /* set preallocation */ 6568 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6569 if (!newisdense) { 6570 PetscInt *new_local_nnz=0; 6571 6572 ptr_idxs = recv_buffer_idxs_local; 6573 if (n_recvs) { 6574 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6575 } 6576 for (i=0;i<n_recvs;i++) { 6577 PetscInt j; 6578 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6579 for (j=0;j<*(ptr_idxs+1);j++) { 6580 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6581 } 6582 } else { 6583 /* TODO */ 6584 } 6585 ptr_idxs += olengths_idxs[i]; 6586 } 6587 if (new_local_nnz) { 6588 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6589 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6590 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6591 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6592 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6593 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6594 } else { 6595 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6596 } 6597 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6598 } else { 6599 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6600 } 6601 6602 /* set values */ 6603 ptr_vals = recv_buffer_vals; 6604 ptr_idxs = recv_buffer_idxs_local; 6605 for (i=0;i<n_recvs;i++) { 6606 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6607 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6608 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6609 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6610 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6611 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6612 } else { 6613 /* TODO */ 6614 } 6615 ptr_idxs += olengths_idxs[i]; 6616 ptr_vals += olengths_vals[i]; 6617 } 6618 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6619 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6620 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6621 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6622 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6623 6624 #if 0 6625 if (!restrict_comm) { /* check */ 6626 Vec lvec,rvec; 6627 PetscReal infty_error; 6628 6629 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6630 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6631 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6632 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6633 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6634 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6635 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6636 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6637 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6638 } 6639 #endif 6640 6641 /* assemble new additional is (if any) */ 6642 if (nis) { 6643 PetscInt **temp_idxs,*count_is,j,psum; 6644 6645 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6646 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6647 ptr_idxs = recv_buffer_idxs_is; 6648 psum = 0; 6649 for (i=0;i<n_recvs;i++) { 6650 for (j=0;j<nis;j++) { 6651 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6652 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6653 psum += plen; 6654 ptr_idxs += plen+1; /* shift pointer to received data */ 6655 } 6656 } 6657 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6658 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6659 for (i=1;i<nis;i++) { 6660 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6661 } 6662 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6663 ptr_idxs = recv_buffer_idxs_is; 6664 for (i=0;i<n_recvs;i++) { 6665 for (j=0;j<nis;j++) { 6666 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6667 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6668 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6669 ptr_idxs += plen+1; /* shift pointer to received data */ 6670 } 6671 } 6672 for (i=0;i<nis;i++) { 6673 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6674 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6675 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6676 } 6677 ierr = PetscFree(count_is);CHKERRQ(ierr); 6678 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6679 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6680 } 6681 /* free workspace */ 6682 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6683 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6684 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6685 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6686 if (isdense) { 6687 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6688 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6689 } else { 6690 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6691 } 6692 if (nis) { 6693 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6694 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6695 } 6696 6697 if (nvecs) { 6698 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6699 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6700 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6701 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6702 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6703 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6704 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6705 /* set values */ 6706 ptr_vals = recv_buffer_vecs; 6707 ptr_idxs = recv_buffer_idxs_local; 6708 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6709 for (i=0;i<n_recvs;i++) { 6710 PetscInt j; 6711 for (j=0;j<*(ptr_idxs+1);j++) { 6712 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6713 } 6714 ptr_idxs += olengths_idxs[i]; 6715 ptr_vals += olengths_idxs[i]-2; 6716 } 6717 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6718 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6719 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6720 } 6721 6722 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6723 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6724 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6725 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6726 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6727 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6728 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6729 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6730 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6731 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6732 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6733 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6734 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6735 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6736 ierr = PetscFree(onodes);CHKERRQ(ierr); 6737 if (nis) { 6738 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6739 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6740 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6741 } 6742 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6743 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6744 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6745 for (i=0;i<nis;i++) { 6746 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6747 } 6748 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6749 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6750 } 6751 *mat_n = NULL; 6752 } 6753 PetscFunctionReturn(0); 6754 } 6755 6756 /* temporary hack into ksp private data structure */ 6757 #include <petsc/private/kspimpl.h> 6758 6759 #undef __FUNCT__ 6760 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6761 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6762 { 6763 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6764 PC_IS *pcis = (PC_IS*)pc->data; 6765 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6766 Mat coarsedivudotp = NULL; 6767 MatNullSpace CoarseNullSpace = NULL; 6768 ISLocalToGlobalMapping coarse_islg; 6769 IS coarse_is,*isarray; 6770 PetscInt i,im_active=-1,active_procs=-1; 6771 PetscInt nis,nisdofs,nisneu,nisvert; 6772 PC pc_temp; 6773 PCType coarse_pc_type; 6774 KSPType coarse_ksp_type; 6775 PetscBool multilevel_requested,multilevel_allowed; 6776 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6777 Mat t_coarse_mat_is; 6778 PetscInt ncoarse; 6779 PetscBool compute_vecs = PETSC_FALSE; 6780 PetscScalar *array; 6781 MatReuse coarse_mat_reuse; 6782 PetscBool restr, full_restr, have_void; 6783 PetscErrorCode ierr; 6784 6785 PetscFunctionBegin; 6786 /* Assign global numbering to coarse dofs */ 6787 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6788 PetscInt ocoarse_size; 6789 compute_vecs = PETSC_TRUE; 6790 ocoarse_size = pcbddc->coarse_size; 6791 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6792 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6793 /* see if we can avoid some work */ 6794 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6795 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6796 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6797 PC pc; 6798 PetscBool isbddc; 6799 6800 /* temporary workaround since PCBDDC does not have a reset method so far */ 6801 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6802 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6803 if (isbddc) { 6804 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6805 } else { 6806 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6807 } 6808 coarse_reuse = PETSC_FALSE; 6809 } else { /* we can safely reuse already computed coarse matrix */ 6810 coarse_reuse = PETSC_TRUE; 6811 } 6812 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6813 coarse_reuse = PETSC_FALSE; 6814 } 6815 /* reset any subassembling information */ 6816 if (!coarse_reuse || pcbddc->recompute_topography) { 6817 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6818 } 6819 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6820 coarse_reuse = PETSC_TRUE; 6821 } 6822 /* assemble coarse matrix */ 6823 if (coarse_reuse && pcbddc->coarse_ksp) { 6824 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6825 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6826 coarse_mat_reuse = MAT_REUSE_MATRIX; 6827 } else { 6828 coarse_mat = NULL; 6829 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6830 } 6831 6832 /* creates temporary l2gmap and IS for coarse indexes */ 6833 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6834 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6835 6836 /* creates temporary MATIS object for coarse matrix */ 6837 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6838 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6839 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6840 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6841 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6842 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6843 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6844 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6845 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6846 6847 /* count "active" (i.e. with positive local size) and "void" processes */ 6848 im_active = !!(pcis->n); 6849 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6850 6851 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6852 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6853 /* full_restr : just use the receivers from the subassembling pattern */ 6854 coarse_mat_is = NULL; 6855 multilevel_allowed = PETSC_FALSE; 6856 multilevel_requested = PETSC_FALSE; 6857 full_restr = PETSC_TRUE; 6858 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6859 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6860 if (multilevel_requested) { 6861 ncoarse = active_procs/pcbddc->coarsening_ratio; 6862 restr = PETSC_FALSE; 6863 full_restr = PETSC_FALSE; 6864 } else { 6865 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6866 restr = PETSC_TRUE; 6867 full_restr = PETSC_TRUE; 6868 } 6869 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6870 ncoarse = PetscMax(1,ncoarse); 6871 if (!pcbddc->coarse_subassembling) { 6872 if (pcbddc->coarsening_ratio > 1) { 6873 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6874 } else { 6875 PetscMPIInt size,rank; 6876 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6877 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6878 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6879 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6880 } 6881 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6882 PetscInt psum; 6883 PetscMPIInt size; 6884 if (pcbddc->coarse_ksp) psum = 1; 6885 else psum = 0; 6886 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6887 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6888 if (ncoarse < size) have_void = PETSC_TRUE; 6889 } 6890 /* determine if we can go multilevel */ 6891 if (multilevel_requested) { 6892 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6893 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6894 } 6895 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6896 6897 /* dump subassembling pattern */ 6898 if (pcbddc->dbg_flag && multilevel_allowed) { 6899 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6900 } 6901 6902 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6903 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6904 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6905 const PetscInt *idxs; 6906 ISLocalToGlobalMapping tmap; 6907 6908 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6909 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6910 /* allocate space for temporary storage */ 6911 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6912 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 6913 /* allocate for IS array */ 6914 nisdofs = pcbddc->n_ISForDofsLocal; 6915 nisneu = !!pcbddc->NeumannBoundariesLocal; 6916 nisvert = 0; /* nisvert is not used */ 6917 nis = nisdofs + nisneu + nisvert; 6918 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 6919 /* dofs splitting */ 6920 for (i=0;i<nisdofs;i++) { 6921 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 6922 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 6923 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6924 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6925 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6926 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6927 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6928 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 6929 } 6930 /* neumann boundaries */ 6931 if (pcbddc->NeumannBoundariesLocal) { 6932 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 6933 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 6934 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6935 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6936 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6937 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6938 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 6939 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 6940 } 6941 /* free memory */ 6942 ierr = PetscFree(tidxs);CHKERRQ(ierr); 6943 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 6944 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6945 } else { 6946 nis = 0; 6947 nisdofs = 0; 6948 nisneu = 0; 6949 nisvert = 0; 6950 isarray = NULL; 6951 } 6952 /* destroy no longer needed map */ 6953 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6954 6955 /* subassemble */ 6956 if (multilevel_allowed) { 6957 Vec vp[1]; 6958 PetscInt nvecs = 0; 6959 PetscBool reuse,reuser; 6960 6961 if (coarse_mat) reuse = PETSC_TRUE; 6962 else reuse = PETSC_FALSE; 6963 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6964 vp[0] = NULL; 6965 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6966 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6967 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6968 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6969 nvecs = 1; 6970 6971 if (pcbddc->divudotp) { 6972 Mat B,loc_divudotp; 6973 Vec v,p; 6974 IS dummy; 6975 PetscInt np; 6976 6977 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6978 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6979 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6980 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6981 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6982 ierr = VecSet(p,1.);CHKERRQ(ierr); 6983 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6984 ierr = VecDestroy(&p);CHKERRQ(ierr); 6985 ierr = MatDestroy(&B);CHKERRQ(ierr); 6986 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6987 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6988 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6989 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6990 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6991 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6992 ierr = VecDestroy(&v);CHKERRQ(ierr); 6993 } 6994 } 6995 if (reuser) { 6996 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6997 } else { 6998 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6999 } 7000 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7001 PetscScalar *arraym,*arrayv; 7002 PetscInt nl; 7003 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7004 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7005 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7006 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7007 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7008 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7009 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7010 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7011 } else { 7012 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7013 } 7014 } else { 7015 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 7016 } 7017 if (coarse_mat_is || coarse_mat) { 7018 PetscMPIInt size; 7019 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 7020 if (!multilevel_allowed) { 7021 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7022 } else { 7023 Mat A; 7024 7025 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7026 if (coarse_mat_is) { 7027 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7028 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7029 coarse_mat = coarse_mat_is; 7030 } 7031 /* be sure we don't have MatSeqDENSE as local mat */ 7032 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7033 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7034 } 7035 } 7036 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7037 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7038 7039 /* create local to global scatters for coarse problem */ 7040 if (compute_vecs) { 7041 PetscInt lrows; 7042 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7043 if (coarse_mat) { 7044 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7045 } else { 7046 lrows = 0; 7047 } 7048 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7049 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7050 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7051 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7052 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7053 } 7054 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7055 7056 /* set defaults for coarse KSP and PC */ 7057 if (multilevel_allowed) { 7058 coarse_ksp_type = KSPRICHARDSON; 7059 coarse_pc_type = PCBDDC; 7060 } else { 7061 coarse_ksp_type = KSPPREONLY; 7062 coarse_pc_type = PCREDUNDANT; 7063 } 7064 7065 /* print some info if requested */ 7066 if (pcbddc->dbg_flag) { 7067 if (!multilevel_allowed) { 7068 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7069 if (multilevel_requested) { 7070 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7071 } else if (pcbddc->max_levels) { 7072 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7073 } 7074 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7075 } 7076 } 7077 7078 /* create the coarse KSP object only once with defaults */ 7079 if (coarse_mat) { 7080 PetscViewer dbg_viewer = NULL; 7081 if (pcbddc->dbg_flag) { 7082 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7083 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7084 } 7085 if (!pcbddc->coarse_ksp) { 7086 char prefix[256],str_level[16]; 7087 size_t len; 7088 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7089 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7090 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7091 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7092 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7093 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7094 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7095 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7096 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7097 /* prefix */ 7098 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7099 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7100 if (!pcbddc->current_level) { 7101 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7102 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7103 } else { 7104 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7105 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7106 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7107 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7108 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7109 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7110 } 7111 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7112 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7113 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7114 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7115 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7116 /* allow user customization */ 7117 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7118 } 7119 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7120 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7121 if (nisdofs) { 7122 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7123 for (i=0;i<nisdofs;i++) { 7124 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7125 } 7126 } 7127 if (nisneu) { 7128 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7129 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7130 } 7131 if (nisvert) { 7132 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7133 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7134 } 7135 7136 /* get some info after set from options */ 7137 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7138 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7139 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7140 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7141 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7142 isbddc = PETSC_FALSE; 7143 } 7144 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7145 if (isredundant) { 7146 KSP inner_ksp; 7147 PC inner_pc; 7148 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7149 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7150 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7151 } 7152 7153 /* parameters which miss an API */ 7154 if (isbddc) { 7155 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7156 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7157 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7158 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7159 if (pcbddc_coarse->benign_saddle_point) { 7160 Mat coarsedivudotp_is; 7161 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7162 IS row,col; 7163 const PetscInt *gidxs; 7164 PetscInt n,st,M,N; 7165 7166 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7167 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7168 st = st-n; 7169 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7170 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7171 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7172 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7173 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7174 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7175 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7176 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7177 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7178 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7179 ierr = ISDestroy(&row);CHKERRQ(ierr); 7180 ierr = ISDestroy(&col);CHKERRQ(ierr); 7181 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7182 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7183 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7184 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7185 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7186 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7187 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7188 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7189 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7190 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7191 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7192 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7193 } 7194 } 7195 7196 /* propagate symmetry info of coarse matrix */ 7197 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7198 if (pc->pmat->symmetric_set) { 7199 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7200 } 7201 if (pc->pmat->hermitian_set) { 7202 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7203 } 7204 if (pc->pmat->spd_set) { 7205 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7206 } 7207 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7208 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7209 } 7210 /* set operators */ 7211 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7212 if (pcbddc->dbg_flag) { 7213 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7214 } 7215 } 7216 ierr = PetscFree(isarray);CHKERRQ(ierr); 7217 #if 0 7218 { 7219 PetscViewer viewer; 7220 char filename[256]; 7221 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7222 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7223 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7224 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7225 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7226 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7227 } 7228 #endif 7229 7230 if (pcbddc->coarse_ksp) { 7231 Vec crhs,csol; 7232 7233 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7234 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7235 if (!csol) { 7236 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7237 } 7238 if (!crhs) { 7239 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7240 } 7241 } 7242 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7243 7244 /* compute null space for coarse solver if the benign trick has been requested */ 7245 if (pcbddc->benign_null) { 7246 7247 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7248 for (i=0;i<pcbddc->benign_n;i++) { 7249 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7250 } 7251 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7252 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7253 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7254 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7255 if (coarse_mat) { 7256 Vec nullv; 7257 PetscScalar *array,*array2; 7258 PetscInt nl; 7259 7260 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7261 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7262 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7263 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7264 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7265 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7266 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7267 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7268 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7269 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7270 } 7271 } 7272 7273 if (pcbddc->coarse_ksp) { 7274 PetscBool ispreonly; 7275 7276 if (CoarseNullSpace) { 7277 PetscBool isnull; 7278 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7279 if (isnull) { 7280 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7281 } 7282 /* TODO: add local nullspaces (if any) */ 7283 } 7284 /* setup coarse ksp */ 7285 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7286 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7287 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7288 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7289 KSP check_ksp; 7290 KSPType check_ksp_type; 7291 PC check_pc; 7292 Vec check_vec,coarse_vec; 7293 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7294 PetscInt its; 7295 PetscBool compute_eigs; 7296 PetscReal *eigs_r,*eigs_c; 7297 PetscInt neigs; 7298 const char *prefix; 7299 7300 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7301 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7302 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7303 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7304 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7305 /* prevent from setup unneeded object */ 7306 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7307 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7308 if (ispreonly) { 7309 check_ksp_type = KSPPREONLY; 7310 compute_eigs = PETSC_FALSE; 7311 } else { 7312 check_ksp_type = KSPGMRES; 7313 compute_eigs = PETSC_TRUE; 7314 } 7315 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7316 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7317 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7318 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7319 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7320 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7321 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7322 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7323 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7324 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7325 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7326 /* create random vec */ 7327 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7328 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7329 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7330 /* solve coarse problem */ 7331 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7332 /* set eigenvalue estimation if preonly has not been requested */ 7333 if (compute_eigs) { 7334 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7335 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7336 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7337 if (neigs) { 7338 lambda_max = eigs_r[neigs-1]; 7339 lambda_min = eigs_r[0]; 7340 if (pcbddc->use_coarse_estimates) { 7341 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7342 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7343 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7344 } 7345 } 7346 } 7347 } 7348 7349 /* check coarse problem residual error */ 7350 if (pcbddc->dbg_flag) { 7351 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7352 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7353 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7354 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7355 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7356 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7357 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7358 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7359 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7360 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7361 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7362 if (CoarseNullSpace) { 7363 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7364 } 7365 if (compute_eigs) { 7366 PetscReal lambda_max_s,lambda_min_s; 7367 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7368 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7369 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7370 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7371 for (i=0;i<neigs;i++) { 7372 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7373 } 7374 } 7375 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7376 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7377 } 7378 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7379 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7380 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7381 if (compute_eigs) { 7382 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7383 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7384 } 7385 } 7386 } 7387 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7388 /* print additional info */ 7389 if (pcbddc->dbg_flag) { 7390 /* waits until all processes reaches this point */ 7391 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7392 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7393 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7394 } 7395 7396 /* free memory */ 7397 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7398 PetscFunctionReturn(0); 7399 } 7400 7401 #undef __FUNCT__ 7402 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7403 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7404 { 7405 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7406 PC_IS* pcis = (PC_IS*)pc->data; 7407 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7408 IS subset,subset_mult,subset_n; 7409 PetscInt local_size,coarse_size=0; 7410 PetscInt *local_primal_indices=NULL; 7411 const PetscInt *t_local_primal_indices; 7412 PetscErrorCode ierr; 7413 7414 PetscFunctionBegin; 7415 /* Compute global number of coarse dofs */ 7416 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7417 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7418 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7419 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7420 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7421 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7422 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7423 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7424 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7425 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7426 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7427 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7428 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7429 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7430 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7431 7432 /* check numbering */ 7433 if (pcbddc->dbg_flag) { 7434 PetscScalar coarsesum,*array,*array2; 7435 PetscInt i; 7436 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7437 7438 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7439 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7440 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7441 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7442 /* counter */ 7443 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7444 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7445 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7446 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7447 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7448 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7449 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7450 for (i=0;i<pcbddc->local_primal_size;i++) { 7451 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7452 } 7453 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7454 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7455 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7456 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7457 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7458 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7459 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7460 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7461 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7462 for (i=0;i<pcis->n;i++) { 7463 if (array[i] != 0.0 && array[i] != array2[i]) { 7464 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7465 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7466 set_error = PETSC_TRUE; 7467 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7468 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7469 } 7470 } 7471 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7472 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7473 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7474 for (i=0;i<pcis->n;i++) { 7475 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7476 } 7477 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7478 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7479 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7480 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7481 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7482 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7483 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7484 PetscInt *gidxs; 7485 7486 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7487 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7489 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7490 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7491 for (i=0;i<pcbddc->local_primal_size;i++) { 7492 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7493 } 7494 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7495 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7496 } 7497 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7498 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7499 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7500 } 7501 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7502 /* get back data */ 7503 *coarse_size_n = coarse_size; 7504 *local_primal_indices_n = local_primal_indices; 7505 PetscFunctionReturn(0); 7506 } 7507 7508 #undef __FUNCT__ 7509 #define __FUNCT__ "PCBDDCGlobalToLocal" 7510 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7511 { 7512 IS localis_t; 7513 PetscInt i,lsize,*idxs,n; 7514 PetscScalar *vals; 7515 PetscErrorCode ierr; 7516 7517 PetscFunctionBegin; 7518 /* get indices in local ordering exploiting local to global map */ 7519 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7520 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7521 for (i=0;i<lsize;i++) vals[i] = 1.0; 7522 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7523 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7524 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7525 if (idxs) { /* multilevel guard */ 7526 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7527 } 7528 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7529 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7530 ierr = PetscFree(vals);CHKERRQ(ierr); 7531 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7532 /* now compute set in local ordering */ 7533 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7534 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7535 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7536 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7537 for (i=0,lsize=0;i<n;i++) { 7538 if (PetscRealPart(vals[i]) > 0.5) { 7539 lsize++; 7540 } 7541 } 7542 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7543 for (i=0,lsize=0;i<n;i++) { 7544 if (PetscRealPart(vals[i]) > 0.5) { 7545 idxs[lsize++] = i; 7546 } 7547 } 7548 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7549 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7550 *localis = localis_t; 7551 PetscFunctionReturn(0); 7552 } 7553 7554 #undef __FUNCT__ 7555 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7556 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7557 { 7558 PC_IS *pcis=(PC_IS*)pc->data; 7559 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7560 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7561 Mat S_j; 7562 PetscInt *used_xadj,*used_adjncy; 7563 PetscBool free_used_adj; 7564 PetscErrorCode ierr; 7565 7566 PetscFunctionBegin; 7567 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7568 free_used_adj = PETSC_FALSE; 7569 if (pcbddc->sub_schurs_layers == -1) { 7570 used_xadj = NULL; 7571 used_adjncy = NULL; 7572 } else { 7573 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7574 used_xadj = pcbddc->mat_graph->xadj; 7575 used_adjncy = pcbddc->mat_graph->adjncy; 7576 } else if (pcbddc->computed_rowadj) { 7577 used_xadj = pcbddc->mat_graph->xadj; 7578 used_adjncy = pcbddc->mat_graph->adjncy; 7579 } else { 7580 PetscBool flg_row=PETSC_FALSE; 7581 const PetscInt *xadj,*adjncy; 7582 PetscInt nvtxs; 7583 7584 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7585 if (flg_row) { 7586 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7587 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7588 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7589 free_used_adj = PETSC_TRUE; 7590 } else { 7591 pcbddc->sub_schurs_layers = -1; 7592 used_xadj = NULL; 7593 used_adjncy = NULL; 7594 } 7595 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7596 } 7597 } 7598 7599 /* setup sub_schurs data */ 7600 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7601 if (!sub_schurs->schur_explicit) { 7602 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7603 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7604 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7605 } else { 7606 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7607 PetscBool isseqaij,need_change = PETSC_FALSE; 7608 PetscInt benign_n; 7609 Mat change = NULL; 7610 Vec scaling = NULL; 7611 IS change_primal = NULL; 7612 7613 if (!pcbddc->use_vertices && reuse_solvers) { 7614 PetscInt n_vertices; 7615 7616 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7617 reuse_solvers = (PetscBool)!n_vertices; 7618 } 7619 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7620 if (!isseqaij) { 7621 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7622 if (matis->A == pcbddc->local_mat) { 7623 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7624 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7625 } else { 7626 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7627 } 7628 } 7629 if (!pcbddc->benign_change_explicit) { 7630 benign_n = pcbddc->benign_n; 7631 } else { 7632 benign_n = 0; 7633 } 7634 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7635 We need a global reduction to avoid possible deadlocks. 7636 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7637 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7638 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7639 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7640 need_change = (PetscBool)(!need_change); 7641 } 7642 /* If the user defines additional constraints, we import them here. 7643 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7644 if (need_change) { 7645 PC_IS *pcisf; 7646 PC_BDDC *pcbddcf; 7647 PC pcf; 7648 7649 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7650 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7651 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7652 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7653 /* hacks */ 7654 pcisf = (PC_IS*)pcf->data; 7655 pcisf->is_B_local = pcis->is_B_local; 7656 pcisf->vec1_N = pcis->vec1_N; 7657 pcisf->BtoNmap = pcis->BtoNmap; 7658 pcisf->n = pcis->n; 7659 pcisf->n_B = pcis->n_B; 7660 pcbddcf = (PC_BDDC*)pcf->data; 7661 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7662 pcbddcf->mat_graph = pcbddc->mat_graph; 7663 pcbddcf->use_faces = PETSC_TRUE; 7664 pcbddcf->use_change_of_basis = PETSC_TRUE; 7665 pcbddcf->use_change_on_faces = PETSC_TRUE; 7666 pcbddcf->use_qr_single = PETSC_TRUE; 7667 pcbddcf->fake_change = PETSC_TRUE; 7668 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7669 /* store information on primal vertices and change of basis (in local numbering) */ 7670 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7671 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7672 change = pcbddcf->ConstraintMatrix; 7673 pcbddcf->ConstraintMatrix = NULL; 7674 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7675 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7676 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7677 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7678 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7679 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7680 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7681 pcf->ops->destroy = NULL; 7682 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7683 } 7684 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7685 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7686 ierr = MatDestroy(&change);CHKERRQ(ierr); 7687 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7688 } 7689 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7690 7691 /* free adjacency */ 7692 if (free_used_adj) { 7693 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7694 } 7695 PetscFunctionReturn(0); 7696 } 7697 7698 #undef __FUNCT__ 7699 #define __FUNCT__ "PCBDDCInitSubSchurs" 7700 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7701 { 7702 PC_IS *pcis=(PC_IS*)pc->data; 7703 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7704 PCBDDCGraph graph; 7705 PetscErrorCode ierr; 7706 7707 PetscFunctionBegin; 7708 /* attach interface graph for determining subsets */ 7709 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7710 IS verticesIS,verticescomm; 7711 PetscInt vsize,*idxs; 7712 7713 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7714 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7715 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7716 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7717 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7718 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7719 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7720 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7721 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7722 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7723 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7724 } else { 7725 graph = pcbddc->mat_graph; 7726 } 7727 /* print some info */ 7728 if (pcbddc->dbg_flag) { 7729 IS vertices; 7730 PetscInt nv,nedges,nfaces; 7731 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7732 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7733 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7734 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7735 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7736 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7737 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7738 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7739 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7740 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7741 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7742 } 7743 7744 /* sub_schurs init */ 7745 if (!pcbddc->sub_schurs) { 7746 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7747 } 7748 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 7749 7750 /* free graph struct */ 7751 if (pcbddc->sub_schurs_rebuild) { 7752 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7753 } 7754 PetscFunctionReturn(0); 7755 } 7756 7757 #undef __FUNCT__ 7758 #define __FUNCT__ "PCBDDCCheckOperator" 7759 PetscErrorCode PCBDDCCheckOperator(PC pc) 7760 { 7761 PC_IS *pcis=(PC_IS*)pc->data; 7762 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7763 PetscErrorCode ierr; 7764 7765 PetscFunctionBegin; 7766 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7767 IS zerodiag = NULL; 7768 Mat S_j,B0_B=NULL; 7769 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7770 PetscScalar *p0_check,*array,*array2; 7771 PetscReal norm; 7772 PetscInt i; 7773 7774 /* B0 and B0_B */ 7775 if (zerodiag) { 7776 IS dummy; 7777 7778 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7779 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7780 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7781 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7782 } 7783 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7784 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7785 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7786 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7787 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7788 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7789 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7790 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7791 /* S_j */ 7792 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7793 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7794 7795 /* mimic vector in \widetilde{W}_\Gamma */ 7796 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7797 /* continuous in primal space */ 7798 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7799 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7800 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7801 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7802 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7803 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7804 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7805 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7806 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7807 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7808 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7809 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7810 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7811 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7812 7813 /* assemble rhs for coarse problem */ 7814 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7815 /* local with Schur */ 7816 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7817 if (zerodiag) { 7818 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7819 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7820 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7821 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7822 } 7823 /* sum on primal nodes the local contributions */ 7824 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7825 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7826 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7827 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7828 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7829 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7830 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7831 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7832 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7833 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7834 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7835 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7836 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7837 /* scale primal nodes (BDDC sums contibutions) */ 7838 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7839 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7840 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7841 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7842 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7843 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7844 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7845 /* global: \widetilde{B0}_B w_\Gamma */ 7846 if (zerodiag) { 7847 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7848 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7849 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7850 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7851 } 7852 /* BDDC */ 7853 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7854 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7855 7856 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7857 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7858 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7859 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7860 for (i=0;i<pcbddc->benign_n;i++) { 7861 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7862 } 7863 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7864 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7865 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7866 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7867 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7868 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7869 } 7870 PetscFunctionReturn(0); 7871 } 7872