1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 if (!esize) PetscFunctionReturn(0); 84 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 85 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 86 87 /* gradients */ 88 ptr = work + 5*esize; 89 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 90 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 91 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 92 ierr = MatDestroy(&GE);CHKERRQ(ierr); 93 94 /* constants */ 95 ptr += rsize*csize; 96 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 97 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 99 ierr = MatDestroy(&GE);CHKERRQ(ierr); 100 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 101 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "PCBDDCNedelecSupport" 107 PetscErrorCode PCBDDCNedelecSupport(PC pc) 108 { 109 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 110 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 111 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 112 MatNullSpace nnsp; 113 Vec tvec,*quads; 114 PetscSF sfv; 115 ISLocalToGlobalMapping el2g,vl2g,fl2g; 116 MPI_Comm comm; 117 IS lned,primals,allprimals,nedfieldlocal; 118 IS *eedges,*extrows,*extcols,*alleedges; 119 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee; 120 PetscScalar *vals,*work; 121 PetscReal *rwork; 122 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 123 PetscInt ne,nv,Lv,order,n,field; 124 PetscInt n_neigh,*neigh,*n_shared,**shared; 125 PetscInt i,j,extmem,cum,maxsize,rst,nee,nquads=2; 126 PetscInt *extrow,*extrowcum,*marks,*emarks,*vmarks,*gidxs; 127 PetscInt *sfvleaves,*sfvroots; 128 PetscBool print,eerr,done,lrc[2],conforming; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 /* test variable order code and print debug info TODO: to be removed */ 133 print = PETSC_FALSE; 134 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 135 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 136 137 /* Return to caller if there are no edges in the decomposition */ 138 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 139 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 140 ierr = ISLocalToGlobalMappingGetSize(el2g,&n);CHKERRQ(ierr); 141 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 142 lrc[0] = PETSC_FALSE; 143 for (i=0;i<n;i++) { 144 if (PetscRealPart(vals[i]) > 2.) { 145 lrc[0] = PETSC_TRUE; 146 break; 147 } 148 } 149 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 150 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 151 if (!lrc[1]) PetscFunctionReturn(0); 152 153 /* Get discrete gradient 154 If it is defined for a subset of dofs, assumes G is given in global ordering for all the dofs */ 155 G = pcbddc->discretegradient; 156 order = pcbddc->nedorder; 157 conforming = pcbddc->conforming; 158 field = pcbddc->nedfield; 159 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 160 if (pcbddc->n_ISForDofsLocal && field > -1) { 161 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 162 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 163 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 164 } else { 165 PetscBool testnedfield = PETSC_FALSE; 166 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 167 if (!testnedfield) { 168 ne = n; 169 nedfieldlocal = NULL; 170 } else { 171 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 172 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 173 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 174 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 175 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 176 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 177 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 178 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 179 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 180 for (i=0,cum=0;i<n;i++) { 181 if (matis->sf_leafdata[i] > 1) { 182 matis->sf_leafdata[cum++] = i; 183 } 184 } 185 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 186 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 187 } 188 } 189 190 if (nedfieldlocal) { /* merge with previous code when testing is done */ 191 IS is; 192 193 /* need to map from the local Nedelec field to local numbering */ 194 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 195 /* need to map from the local Nedelec field to global numbering */ 196 ierr = ISLocalToGlobalMappingApplyIS(el2g,nedfieldlocal,&is);CHKERRQ(ierr); 197 ierr = ISLocalToGlobalMappingCreateIS(is,&el2g);CHKERRQ(ierr); 198 ierr = ISDestroy(&is);CHKERRQ(ierr); 199 } else { 200 ierr = PetscObjectReference((PetscObject)el2g);CHKERRQ(ierr); 201 fl2g = NULL; 202 } 203 204 /* Sanity checks */ 205 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 206 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 207 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 208 209 /* Drop connections for interior edges (this modifies G) */ 210 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 211 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 212 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 213 if (nedfieldlocal) { 214 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 215 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 216 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 217 } else { 218 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 219 } 220 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 221 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 222 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 223 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 224 if (matis->sf_rootdata[i] < 2) { 225 matis->sf_rootdata[cum++] = i + rst; 226 } 227 } 228 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 229 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 230 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 231 232 /* Extract subdomain relevant rows of G */ 233 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 234 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 235 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 236 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 237 ierr = ISDestroy(&lned);CHKERRQ(ierr); 238 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 239 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 240 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 241 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 242 if (print) { 243 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 244 ierr = MatView(lG,NULL);CHKERRQ(ierr); 245 } 246 247 /* SF for nodal communications */ 248 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 249 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 250 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 251 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 252 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 253 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 254 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 255 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 257 258 /* Destroy temporary G created in MATIS format */ 259 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 260 261 /* Save lG */ 262 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 263 264 /* Analyze the edge-nodes connections (duplicate lG) */ 265 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 266 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 267 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 268 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 269 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 270 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 271 /* need to import the boundary specification to ensure the 272 proper detection of coarse edges' endpoints */ 273 if (pcbddc->DirichletBoundariesLocal) { 274 IS is; 275 276 if (fl2g) { 277 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 278 } else { 279 is = pcbddc->DirichletBoundariesLocal; 280 } 281 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 283 for (i=0;i<cum;i++) { 284 if (idxs[i] >= 0) { 285 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 286 } 287 } 288 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 289 if (fl2g) { 290 ierr = ISDestroy(&is);CHKERRQ(ierr); 291 } 292 } 293 if (pcbddc->NeumannBoundariesLocal) { 294 IS is; 295 296 if (fl2g) { 297 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 298 } else { 299 is = pcbddc->NeumannBoundariesLocal; 300 } 301 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 302 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 303 for (i=0;i<cum;i++) { 304 if (idxs[i] >= 0) { 305 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 306 } 307 } 308 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 309 if (fl2g) { 310 ierr = ISDestroy(&is);CHKERRQ(ierr); 311 } 312 } 313 314 /* need to remove coarse faces' dofs to ensure the 315 proper detection of coarse edges' endpoints */ 316 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 317 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 318 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 319 for (i=1;i<n_neigh;i++) 320 for (j=0;j<n_shared[i];j++) 321 marks[shared[i][j]]++; 322 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 323 for (i=0;i<ne;i++) { 324 if (marks[i] > 1 || (marks[i] == 1 && PetscBTLookup(btb,i))) { 325 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 326 } 327 } 328 329 if (!conforming) { 330 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 331 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 332 } 333 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 334 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 335 cum = 0; 336 for (i=0;i<ne;i++) { 337 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 338 if (!PetscBTLookup(btee,i)) { 339 marks[cum++] = i; 340 continue; 341 } 342 /* set badly connected edge dofs as primal */ 343 if (!conforming) { 344 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 345 marks[cum++] = i; 346 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 347 for (j=ii[i];j<ii[i+1];j++) { 348 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 349 } 350 } else { 351 /* every edge dofs should be connected trough a certain number of nodal dofs 352 to other edge dofs belonging to coarse edges 353 - at most 2 endpoints 354 - order-1 interior nodal dofs 355 - no undefined nodal dofs (nconn < order) 356 */ 357 PetscInt ends = 0,ints = 0, undef = 0; 358 for (j=ii[i];j<ii[i+1];j++) { 359 PetscInt v = jj[j],k; 360 PetscInt nconn = iit[v+1]-iit[v]; 361 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 362 if (nconn > order) ends++; 363 else if (nconn == order) ints++; 364 else undef++; 365 } 366 if (undef || ends > 2 || ints != order -1) { 367 marks[cum++] = i; 368 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 369 for (j=ii[i];j<ii[i+1];j++) { 370 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 371 } 372 } 373 } 374 } 375 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 376 if (!order && ii[i+1] != ii[i]) { 377 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 378 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 379 } 380 } 381 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 382 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 383 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 384 if (!conforming) { 385 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 386 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 387 } 388 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 389 /* identify splitpoints and corner candidates: TODO variable order */ 390 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 391 if (print) { 392 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 393 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 394 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 395 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 396 } 397 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 398 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 399 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 400 for (i=0;i<nv;i++) { 401 PetscInt ord = order, test = ii[i+1]-ii[i]; 402 if (!order) { 403 PetscReal vorder = 0.; 404 405 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 406 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 407 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 408 ord = 1; 409 } 410 #if defined(PETSC_USE_DEBUG) 411 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 412 #endif 413 if (test >= 3*ord) { /* splitpoints */ 414 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 415 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 416 } else if (test == ord) { 417 if (order == 1) { 418 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 419 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 420 } else { 421 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 422 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 423 } 424 } 425 } 426 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 427 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 428 429 /* Get the local G^T explicitly */ 430 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 431 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 432 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 433 434 /* Mark interior nodal dofs */ 435 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 436 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 437 for (i=1;i<n_neigh;i++) { 438 for (j=0;j<n_shared[i];j++) { 439 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 440 } 441 } 442 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 443 444 /* communicate corners and splitpoints */ 445 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 446 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 447 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 448 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 449 450 if (print) { 451 IS tbz; 452 453 cum = 0; 454 for (i=0;i<nv;i++) 455 if (sfvleaves[i]) 456 vmarks[cum++] = i; 457 458 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 459 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 460 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 461 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 462 } 463 464 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 465 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 466 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 467 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 468 469 /* Zero rows of lGt corresponding to identified corners 470 and interior nodal dofs */ 471 cum = 0; 472 for (i=0;i<nv;i++) { 473 if (sfvleaves[i]) { 474 vmarks[cum++] = i; 475 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 476 } 477 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 478 } 479 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 480 if (print) { 481 IS tbz; 482 483 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 484 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 485 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 486 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 487 } 488 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 489 ierr = PetscFree(vmarks);CHKERRQ(ierr); 490 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 491 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 492 493 /* Recompute G */ 494 ierr = MatDestroy(&lG);CHKERRQ(ierr); 495 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 496 if (print) { 497 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 498 ierr = MatView(lG,NULL);CHKERRQ(ierr); 499 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 500 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 501 } 502 503 /* Get primal dofs (if any) */ 504 cum = 0; 505 for (i=0;i<ne;i++) { 506 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 507 } 508 if (fl2g) { 509 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 510 } 511 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 512 if (print) { 513 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 514 ierr = ISView(primals,NULL);CHKERRQ(ierr); 515 } 516 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 517 /* TODO: what if the user passed in some of them ? */ 518 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 519 ierr = ISDestroy(&primals);CHKERRQ(ierr); 520 521 /* Compute edge connectivity */ 522 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 523 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 524 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 525 if (done && i && ii && jj) { /* when lG is empty, don't pass pointers */ 526 if (fl2g) { 527 PetscBT btf; 528 PetscInt *iia,*jja,*iiu,*jju; 529 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 530 531 /* create CSR for all local dofs */ 532 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 533 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 534 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 535 iiu = pcbddc->mat_graph->xadj; 536 jju = pcbddc->mat_graph->adjncy; 537 } else if (pcbddc->use_local_adj) { 538 rest = PETSC_TRUE; 539 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 540 } else { 541 free = PETSC_TRUE; 542 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 543 iiu[0] = 0; 544 for (i=0;i<n;i++) { 545 iiu[i+1] = i+1; 546 jju[i] = -1; 547 } 548 } 549 550 /* import sizes of CSR */ 551 iia[0] = 0; 552 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 553 554 /* overwrite entries corresponding to the Nedelec field */ 555 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 556 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 557 for (i=0;i<ne;i++) { 558 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 559 iia[idxs[i]+1] = ii[i+1]-ii[i]; 560 } 561 562 /* iia in CSR */ 563 for (i=0;i<n;i++) iia[i+1] += iia[i]; 564 565 /* jja in CSR */ 566 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 567 for (i=0;i<n;i++) 568 if (!PetscBTLookup(btf,i)) 569 for (j=0;j<iiu[i+1]-iiu[i];j++) 570 jja[iia[i]+j] = jju[iiu[i]+j]; 571 572 /* map edge dofs connectivity */ 573 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 574 for (i=0;i<ne;i++) { 575 PetscInt e = idxs[i]; 576 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 577 } 578 579 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 580 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 581 if (rest) { 582 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 583 } 584 if (free) { 585 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 586 } 587 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 588 } else { 589 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 590 } 591 } 592 593 /* Analyze interface for edge dofs */ 594 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 595 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 596 597 /* Get coarse edges in the edge space */ 598 pcbddc->mat_graph->twodim = PETSC_FALSE; 599 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 600 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 601 602 if (fl2g) { 603 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 604 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 605 for (i=0;i<nee;i++) { 606 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 607 } 608 } else { 609 eedges = alleedges; 610 primals = allprimals; 611 } 612 613 /* Mark fine edge dofs with their coarse edge id */ 614 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 615 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 616 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 617 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 618 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 619 if (print) { 620 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 621 ierr = ISView(primals,NULL);CHKERRQ(ierr); 622 } 623 624 maxsize = 0; 625 for (i=0;i<nee;i++) { 626 PetscInt size,mark = i+1; 627 628 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 629 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 630 for (j=0;j<size;j++) marks[idxs[j]] = mark; 631 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 632 maxsize = PetscMax(maxsize,size); 633 } 634 635 /* Find coarse edge endpoints */ 636 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 637 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 638 for (i=0;i<nee;i++) { 639 PetscInt mark = i+1,size; 640 641 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 642 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 643 if (print) { 644 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 645 ISView(eedges[i],NULL); 646 } 647 for (j=0;j<size;j++) { 648 PetscInt k, ee = idxs[j]; 649 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 650 for (k=ii[ee];k<ii[ee+1];k++) { 651 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 652 if (PetscBTLookup(btv,jj[k])) { 653 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 654 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 655 PetscInt k2; 656 PetscBool corner = PETSC_FALSE; 657 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 658 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 659 /* it's a corner if either is connected with an edge dof belonging to a different cc or 660 if the edge dof lie on the natural part of the boundary */ 661 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 662 corner = PETSC_TRUE; 663 break; 664 } 665 } 666 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 667 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 668 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 669 } else { 670 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 671 } 672 } 673 } 674 } 675 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 676 } 677 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 678 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 679 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 680 681 /* Reset marked primal dofs */ 682 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 683 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 684 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 685 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 686 687 /* Now use the initial lG */ 688 ierr = MatDestroy(&lG);CHKERRQ(ierr); 689 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 690 lG = lGinit; 691 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 692 693 /* Compute extended cols indices */ 694 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 695 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 696 i *= maxsize; 697 ierr = PetscMalloc1(nee,&extcols);CHKERRQ(ierr); 698 ierr = PetscCalloc1(nee,&emarks);CHKERRQ(ierr); 699 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 700 eerr = PETSC_FALSE; 701 for (i=0;i<nee;i++) { 702 PetscInt size; 703 704 cum = 0; 705 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 706 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 707 for (j=0;j<size;j++) { 708 PetscInt k,ee = idxs[j]; 709 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 710 } 711 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 712 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 713 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 714 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 715 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 716 /* it may happen that endpoints are not defined at this point 717 if it is the case, mark this edge for a second pass */ 718 if (cum != size -1) { 719 emarks[i] = 1; 720 if (print) { 721 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 722 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 723 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 724 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 725 } 726 eerr = PETSC_TRUE; 727 } 728 } 729 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 730 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 731 if (done) { 732 PetscInt *newprimals; 733 734 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 735 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 736 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 737 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 738 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 739 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 740 for (i=0;i<nee;i++) { 741 if (emarks[i]) { 742 PetscInt size,mark = i+1; 743 744 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 745 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 746 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 747 for (j=0;j<size;j++) { 748 PetscInt k,ee = idxs[j]; 749 for (k=ii[ee];k<ii[ee+1];k++) { 750 /* set all candidates located on the edge as corners */ 751 if (PetscBTLookup(btvcand,jj[k])) { 752 PetscInt k2,vv = jj[k]; 753 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 754 /* set all edge dofs connected to candidate as primals */ 755 for (k2=iit[vv];k2<iit[vv+1];k2++) { 756 if (marks[jjt[k2]] == mark) { 757 PetscInt k3,ee2 = jjt[k2]; 758 newprimals[cum++] = ee2; 759 /* finally set the new corners */ 760 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 761 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 762 } 763 } 764 } 765 } 766 } 767 } 768 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 769 } 770 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 771 } 772 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 773 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 774 if (fl2g) { 775 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 776 ierr = ISDestroy(&primals);CHKERRQ(ierr); 777 for (i=0;i<nee;i++) { 778 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 779 } 780 ierr = PetscFree(eedges);CHKERRQ(ierr); 781 } 782 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 783 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 784 ierr = PetscFree(newprimals);CHKERRQ(ierr); 785 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 786 ierr = ISDestroy(&primals);CHKERRQ(ierr); 787 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 788 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 789 if (fl2g) { 790 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 791 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 792 for (i=0;i<nee;i++) { 793 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 794 } 795 } else { 796 eedges = alleedges; 797 primals = allprimals; 798 } 799 800 /* Mark again */ 801 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 802 for (i=0;i<nee;i++) { 803 PetscInt size,mark = i+1; 804 805 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 806 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 807 for (j=0;j<size;j++) marks[idxs[j]] = mark; 808 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 809 } 810 if (print) { 811 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 812 ierr = ISView(primals,NULL);CHKERRQ(ierr); 813 } 814 815 /* Recompute extended cols */ 816 eerr = PETSC_FALSE; 817 for (i=0;i<nee;i++) { 818 PetscInt size; 819 820 cum = 0; 821 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 822 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 823 for (j=0;j<size;j++) { 824 PetscInt k,ee = idxs[j]; 825 for (k=ii[ee];k<ii[ee+1];k++) { 826 if (!PetscBTLookup(btv,jj[k])) { 827 extrow[cum++] = jj[k]; 828 } 829 } 830 } 831 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 832 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 833 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 834 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 835 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 836 if (cum != size -1) { 837 if (print) { 838 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 839 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 840 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 841 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 842 } 843 eerr = PETSC_TRUE; 844 } 845 } 846 } 847 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 848 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 849 ierr = PetscFree(emarks);CHKERRQ(ierr); 850 /* an error should not occur at this point */ 851 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 852 853 /* Check the number of endpoints */ 854 /* TODO: fix case for circular edge */ 855 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 856 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 857 for (i=0;i<nee;i++) { 858 PetscInt size, found = 0; 859 860 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 861 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 862 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 863 for (j=0;j<size;j++) { 864 PetscInt k,ee = idxs[j]; 865 for (k=ii[ee];k<ii[ee+1];k++) { 866 PetscInt vv = jj[k]; 867 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 868 found++; 869 } 870 } 871 } 872 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 873 if (found != 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d\n",found,i); 874 } 875 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 876 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 877 878 #if defined(PETSC_USE_DEBUG) 879 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 880 not interfere with neighbouring coarse edges */ 881 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 883 for (i=0;i<nv;i++) { 884 PetscInt emax = 0,eemax = 0; 885 886 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 887 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 888 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 889 for (j=1;j<nee+1;j++) { 890 if (emax < emarks[j]) { 891 emax = emarks[j]; 892 eemax = j; 893 } 894 } 895 /* not relevant for edges */ 896 if (!eemax) continue; 897 898 for (j=ii[i];j<ii[i+1];j++) { 899 if (marks[jj[j]] && marks[jj[j]] != eemax) { 900 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 901 } 902 } 903 } 904 ierr = PetscFree(emarks);CHKERRQ(ierr); 905 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 906 #endif 907 908 /* Compute extended rows indices for edge blocks of the change of basis */ 909 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 910 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 911 extmem *= maxsize; 912 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 913 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 914 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 915 for (i=0;i<nv;i++) { 916 PetscInt mark = 0,size,start; 917 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 918 for (j=ii[i];j<ii[i+1];j++) 919 if (marks[jj[j]] && !mark) 920 mark = marks[jj[j]]; 921 922 /* not relevant */ 923 if (!mark) continue; 924 925 /* import extended row */ 926 mark--; 927 start = mark*extmem+extrowcum[mark]; 928 size = ii[i+1]-ii[i]; 929 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 930 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 931 extrowcum[mark] += size; 932 } 933 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 934 cum = 0; 935 for (i=0;i<nee;i++) { 936 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 937 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 938 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 939 cum = PetscMax(cum,size); 940 } 941 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 942 ierr = PetscFree(marks);CHKERRQ(ierr); 943 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 944 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 945 946 /* Workspace for lapack inner calls and VecSetValues */ 947 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 948 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 949 for (i=0;i<maxsize;i++) vals[i] = 1.; 950 951 /* Create vectors for quadrature rules */ 952 /* TODO preserve other quadratures */ 953 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 954 for (i=0;i<nquads;i++) { 955 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 956 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 957 } 958 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 959 960 /* Create change of basis matrix (preallocation can be improved) */ 961 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 962 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 963 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 964 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 965 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 966 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 967 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 968 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 969 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 970 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 971 972 /* Defaults to identity */ 973 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 974 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 975 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 976 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 977 978 for (i=0;i<nee;i++) { 979 Mat Gins = NULL, GKins = NULL; 980 981 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 982 if (Gins && GKins) { 983 PetscScalar *data; 984 const PetscInt *rows,*cols; 985 PetscInt nrh,nch,nrc,ncc; 986 987 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 988 /* H1 */ 989 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 990 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 991 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 992 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 993 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 994 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 995 /* complement */ 996 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 997 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 998 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 999 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1000 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1001 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1002 /* Gins kernel quadratures */ 1003 for (j=0;j<ncc;j++) { 1004 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 1005 } 1006 /* H1 average */ 1007 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1008 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1009 } 1010 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1011 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1012 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1013 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1014 } 1015 1016 /* Start assembling */ 1017 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1018 for (i=0;i<nquads;i++) { 1019 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 1020 } 1021 1022 /* Free */ 1023 if (fl2g) { 1024 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1025 for (i=0;i<nee;i++) { 1026 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1027 } 1028 ierr = PetscFree(eedges);CHKERRQ(ierr); 1029 } 1030 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1031 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1032 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1033 ierr = PetscFree(extrow);CHKERRQ(ierr); 1034 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1035 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1036 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1037 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1038 ierr = PetscFree(vals);CHKERRQ(ierr); 1039 ierr = PetscFree(extrows);CHKERRQ(ierr); 1040 ierr = PetscFree(extcols);CHKERRQ(ierr); 1041 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1042 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1043 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1044 1045 /* Complete assembling */ 1046 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1047 for (i=0;i<nquads;i++) { 1048 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 1049 } 1050 for (i=0;i<nquads;i++) { 1051 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 1052 } 1053 ierr = PetscFree(quads);CHKERRQ(ierr); 1054 1055 /* set change of basis */ 1056 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1057 ierr = MatDestroy(&T);CHKERRQ(ierr); 1058 1059 /* set quadratures */ 1060 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 1061 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 1062 1063 PetscFunctionReturn(0); 1064 } 1065 1066 /* the near-null space of BDDC carries information on quadrature weights, 1067 and these can be collinear -> so cheat with MatNullSpaceCreate 1068 and create a suitable set of basis vectors first */ 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1071 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1072 { 1073 PetscErrorCode ierr; 1074 PetscInt i; 1075 1076 PetscFunctionBegin; 1077 for (i=0;i<nvecs;i++) { 1078 PetscInt first,last; 1079 1080 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1081 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1082 if (i>=first && i < last) { 1083 PetscScalar *data; 1084 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1085 if (!has_const) { 1086 data[i-first] = 1.; 1087 } else { 1088 data[2*i-first] = 1./PetscSqrtReal(2.); 1089 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1090 } 1091 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1092 } 1093 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1094 } 1095 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1096 for (i=0;i<nvecs;i++) { /* reset vectors */ 1097 PetscInt first,last; 1098 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1099 if (i>=first && i < last) { 1100 PetscScalar *data; 1101 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1102 if (!has_const) { 1103 data[i-first] = 0.; 1104 } else { 1105 data[2*i-first] = 0.; 1106 data[2*i-first+1] = 0.; 1107 } 1108 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1109 } 1110 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1117 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1118 { 1119 Mat loc_divudotp; 1120 Vec p,v,vins,quad_vec,*quad_vecs; 1121 ISLocalToGlobalMapping map; 1122 IS *faces,*edges; 1123 PetscScalar *vals; 1124 const PetscScalar *array; 1125 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1126 PetscMPIInt rank; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1131 if (graph->twodim) { 1132 lmaxneighs = 2; 1133 } else { 1134 lmaxneighs = 1; 1135 for (i=0;i<ne;i++) { 1136 const PetscInt *idxs; 1137 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1138 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1139 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1140 } 1141 lmaxneighs++; /* graph count does not include self */ 1142 } 1143 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1144 maxsize = 0; 1145 for (i=0;i<ne;i++) { 1146 PetscInt nn; 1147 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1148 maxsize = PetscMax(maxsize,nn); 1149 } 1150 for (i=0;i<nf;i++) { 1151 PetscInt nn; 1152 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1153 maxsize = PetscMax(maxsize,nn); 1154 } 1155 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1156 /* create vectors to hold quadrature weights */ 1157 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1158 if (!transpose) { 1159 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1160 } else { 1161 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1162 } 1163 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1164 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1165 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1166 for (i=0;i<maxneighs;i++) { 1167 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1168 } 1169 1170 /* compute local quad vec */ 1171 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1172 if (!transpose) { 1173 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1174 } else { 1175 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1176 } 1177 ierr = VecSet(p,1.);CHKERRQ(ierr); 1178 if (!transpose) { 1179 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1180 } else { 1181 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1182 } 1183 if (vl2l) { 1184 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1185 } else { 1186 vins = v; 1187 } 1188 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1189 ierr = VecDestroy(&p);CHKERRQ(ierr); 1190 1191 /* insert in global quadrature vecs */ 1192 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1193 for (i=0;i<nf;i++) { 1194 const PetscInt *idxs; 1195 PetscInt idx,nn,j; 1196 1197 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1198 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1199 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1200 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1201 idx = -(idx+1); 1202 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1203 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1204 } 1205 for (i=0;i<ne;i++) { 1206 const PetscInt *idxs; 1207 PetscInt idx,nn,j; 1208 1209 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1210 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1211 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1212 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1213 idx = -(idx+1); 1214 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1215 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1216 } 1217 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1218 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1219 if (vl2l) { 1220 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1221 } 1222 ierr = VecDestroy(&v);CHKERRQ(ierr); 1223 ierr = PetscFree(vals);CHKERRQ(ierr); 1224 1225 /* assemble near null space */ 1226 for (i=0;i<maxneighs;i++) { 1227 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1228 } 1229 for (i=0;i<maxneighs;i++) { 1230 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1231 } 1232 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1239 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1240 { 1241 PetscErrorCode ierr; 1242 Vec local,global; 1243 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1244 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1245 1246 PetscFunctionBegin; 1247 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1248 /* need to convert from global to local topology information and remove references to information in global ordering */ 1249 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1250 if (pcbddc->user_provided_isfordofs) { 1251 if (pcbddc->n_ISForDofs) { 1252 PetscInt i; 1253 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1254 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1255 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1256 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1257 } 1258 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1259 pcbddc->n_ISForDofs = 0; 1260 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1261 } 1262 } else { 1263 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1264 PetscInt i, n = matis->A->rmap->n; 1265 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1266 if (i > 1) { 1267 pcbddc->n_ISForDofsLocal = i; 1268 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1269 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1270 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1271 } 1272 } 1273 } 1274 } 1275 1276 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1277 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1278 } 1279 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1280 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1281 } 1282 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1283 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1284 } 1285 ierr = VecDestroy(&global);CHKERRQ(ierr); 1286 ierr = VecDestroy(&local);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1292 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1293 { 1294 PC_IS *pcis = (PC_IS*)(pc->data); 1295 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1296 PetscErrorCode ierr; 1297 1298 PetscFunctionBegin; 1299 if (!pcbddc->benign_have_null) { 1300 PetscFunctionReturn(0); 1301 } 1302 if (pcbddc->ChangeOfBasisMatrix) { 1303 Vec swap; 1304 1305 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1306 swap = pcbddc->work_change; 1307 pcbddc->work_change = r; 1308 r = swap; 1309 } 1310 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1311 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1312 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1313 ierr = VecSet(z,0.);CHKERRQ(ierr); 1314 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1315 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1316 if (pcbddc->ChangeOfBasisMatrix) { 1317 pcbddc->work_change = r; 1318 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1319 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1320 } 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1326 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1327 { 1328 PCBDDCBenignMatMult_ctx ctx; 1329 PetscErrorCode ierr; 1330 PetscBool apply_right,apply_left,reset_x; 1331 1332 PetscFunctionBegin; 1333 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1334 if (transpose) { 1335 apply_right = ctx->apply_left; 1336 apply_left = ctx->apply_right; 1337 } else { 1338 apply_right = ctx->apply_right; 1339 apply_left = ctx->apply_left; 1340 } 1341 reset_x = PETSC_FALSE; 1342 if (apply_right) { 1343 const PetscScalar *ax; 1344 PetscInt nl,i; 1345 1346 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1347 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1348 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1349 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1350 for (i=0;i<ctx->benign_n;i++) { 1351 PetscScalar sum,val; 1352 const PetscInt *idxs; 1353 PetscInt nz,j; 1354 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1355 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1356 sum = 0.; 1357 if (ctx->apply_p0) { 1358 val = ctx->work[idxs[nz-1]]; 1359 for (j=0;j<nz-1;j++) { 1360 sum += ctx->work[idxs[j]]; 1361 ctx->work[idxs[j]] += val; 1362 } 1363 } else { 1364 for (j=0;j<nz-1;j++) { 1365 sum += ctx->work[idxs[j]]; 1366 } 1367 } 1368 ctx->work[idxs[nz-1]] -= sum; 1369 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1370 } 1371 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1372 reset_x = PETSC_TRUE; 1373 } 1374 if (transpose) { 1375 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1376 } else { 1377 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1378 } 1379 if (reset_x) { 1380 ierr = VecResetArray(x);CHKERRQ(ierr); 1381 } 1382 if (apply_left) { 1383 PetscScalar *ay; 1384 PetscInt i; 1385 1386 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1387 for (i=0;i<ctx->benign_n;i++) { 1388 PetscScalar sum,val; 1389 const PetscInt *idxs; 1390 PetscInt nz,j; 1391 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1392 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1393 val = -ay[idxs[nz-1]]; 1394 if (ctx->apply_p0) { 1395 sum = 0.; 1396 for (j=0;j<nz-1;j++) { 1397 sum += ay[idxs[j]]; 1398 ay[idxs[j]] += val; 1399 } 1400 ay[idxs[nz-1]] += sum; 1401 } else { 1402 for (j=0;j<nz-1;j++) { 1403 ay[idxs[j]] += val; 1404 } 1405 ay[idxs[nz-1]] = 0.; 1406 } 1407 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1408 } 1409 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1410 } 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1416 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1417 { 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1427 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1428 { 1429 PetscErrorCode ierr; 1430 1431 PetscFunctionBegin; 1432 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "PCBDDCBenignShellMat" 1438 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1439 { 1440 PC_IS *pcis = (PC_IS*)pc->data; 1441 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1442 PCBDDCBenignMatMult_ctx ctx; 1443 PetscErrorCode ierr; 1444 1445 PetscFunctionBegin; 1446 if (!restore) { 1447 Mat A_IB,A_BI; 1448 PetscScalar *work; 1449 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1450 1451 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1452 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1453 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1454 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1455 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1456 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1457 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1458 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1459 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1460 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1461 ctx->apply_left = PETSC_TRUE; 1462 ctx->apply_right = PETSC_FALSE; 1463 ctx->apply_p0 = PETSC_FALSE; 1464 ctx->benign_n = pcbddc->benign_n; 1465 if (reuse) { 1466 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1467 ctx->free = PETSC_FALSE; 1468 } else { /* TODO: could be optimized for successive solves */ 1469 ISLocalToGlobalMapping N_to_D; 1470 PetscInt i; 1471 1472 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1473 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1474 for (i=0;i<pcbddc->benign_n;i++) { 1475 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1476 } 1477 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1478 ctx->free = PETSC_TRUE; 1479 } 1480 ctx->A = pcis->A_IB; 1481 ctx->work = work; 1482 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1483 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1484 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1485 pcis->A_IB = A_IB; 1486 1487 /* A_BI as A_IB^T */ 1488 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1489 pcbddc->benign_original_mat = pcis->A_BI; 1490 pcis->A_BI = A_BI; 1491 } else { 1492 if (!pcbddc->benign_original_mat) { 1493 PetscFunctionReturn(0); 1494 } 1495 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1496 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1497 pcis->A_IB = ctx->A; 1498 ctx->A = NULL; 1499 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1500 pcis->A_BI = pcbddc->benign_original_mat; 1501 pcbddc->benign_original_mat = NULL; 1502 if (ctx->free) { 1503 PetscInt i; 1504 for (i=0;i<ctx->benign_n;i++) { 1505 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1506 } 1507 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1508 } 1509 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1510 ierr = PetscFree(ctx);CHKERRQ(ierr); 1511 } 1512 PetscFunctionReturn(0); 1513 } 1514 1515 /* used just in bddc debug mode */ 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "PCBDDCBenignProject" 1518 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1519 { 1520 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1521 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1522 Mat An; 1523 PetscErrorCode ierr; 1524 1525 PetscFunctionBegin; 1526 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1527 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1528 if (is1) { 1529 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1530 ierr = MatDestroy(&An);CHKERRQ(ierr); 1531 } else { 1532 *B = An; 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 /* TODO: add reuse flag */ 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatSeqAIJCompress" 1540 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1541 { 1542 Mat Bt; 1543 PetscScalar *a,*bdata; 1544 const PetscInt *ii,*ij; 1545 PetscInt m,n,i,nnz,*bii,*bij; 1546 PetscBool flg_row; 1547 PetscErrorCode ierr; 1548 1549 PetscFunctionBegin; 1550 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1551 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1552 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1553 nnz = n; 1554 for (i=0;i<ii[n];i++) { 1555 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1556 } 1557 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1558 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1559 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1560 nnz = 0; 1561 bii[0] = 0; 1562 for (i=0;i<n;i++) { 1563 PetscInt j; 1564 for (j=ii[i];j<ii[i+1];j++) { 1565 PetscScalar entry = a[j]; 1566 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1567 bij[nnz] = ij[j]; 1568 bdata[nnz] = entry; 1569 nnz++; 1570 } 1571 } 1572 bii[i+1] = nnz; 1573 } 1574 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1575 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1576 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1577 { 1578 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1579 b->free_a = PETSC_TRUE; 1580 b->free_ij = PETSC_TRUE; 1581 } 1582 *B = Bt; 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatDetectDisconnectedComponents" 1588 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1589 { 1590 Mat B; 1591 IS is_dummy,*cc_n; 1592 ISLocalToGlobalMapping l2gmap_dummy; 1593 PCBDDCGraph graph; 1594 PetscInt i,n; 1595 PetscInt *xadj,*adjncy; 1596 PetscInt *xadj_filtered,*adjncy_filtered; 1597 PetscBool flg_row,isseqaij; 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 if (!A->rmap->N || !A->cmap->N) { 1602 *ncc = 0; 1603 *cc = NULL; 1604 PetscFunctionReturn(0); 1605 } 1606 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1607 if (!isseqaij && filter) { 1608 PetscBool isseqdense; 1609 1610 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1611 if (!isseqdense) { 1612 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1613 } else { /* TODO: rectangular case and LDA */ 1614 PetscScalar *array; 1615 PetscReal chop=1.e-6; 1616 1617 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1618 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1619 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1620 for (i=0;i<n;i++) { 1621 PetscInt j; 1622 for (j=i+1;j<n;j++) { 1623 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1624 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1625 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1626 } 1627 } 1628 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1629 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1630 } 1631 } else { 1632 B = A; 1633 } 1634 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1635 1636 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1637 if (filter) { 1638 PetscScalar *data; 1639 PetscInt j,cum; 1640 1641 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1642 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1643 cum = 0; 1644 for (i=0;i<n;i++) { 1645 PetscInt t; 1646 1647 for (j=xadj[i];j<xadj[i+1];j++) { 1648 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1649 continue; 1650 } 1651 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1652 } 1653 t = xadj_filtered[i]; 1654 xadj_filtered[i] = cum; 1655 cum += t; 1656 } 1657 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1658 } else { 1659 xadj_filtered = NULL; 1660 adjncy_filtered = NULL; 1661 } 1662 1663 /* compute local connected components using PCBDDCGraph */ 1664 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1665 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1666 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1667 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1668 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1669 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1670 if (xadj_filtered) { 1671 graph->xadj = xadj_filtered; 1672 graph->adjncy = adjncy_filtered; 1673 } else { 1674 graph->xadj = xadj; 1675 graph->adjncy = adjncy; 1676 } 1677 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1678 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1679 /* partial clean up */ 1680 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1681 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1682 if (A != B) { 1683 ierr = MatDestroy(&B);CHKERRQ(ierr); 1684 } 1685 1686 /* get back data */ 1687 if (ncc) *ncc = graph->ncc; 1688 if (cc) { 1689 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1690 for (i=0;i<graph->ncc;i++) { 1691 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1692 } 1693 *cc = cc_n; 1694 } 1695 /* clean up graph */ 1696 graph->xadj = 0; 1697 graph->adjncy = 0; 1698 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "PCBDDCBenignCheck" 1704 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1705 { 1706 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1707 PC_IS* pcis = (PC_IS*)(pc->data); 1708 IS dirIS = NULL; 1709 PetscInt i; 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1714 if (zerodiag) { 1715 Mat A; 1716 Vec vec3_N; 1717 PetscScalar *vals; 1718 const PetscInt *idxs; 1719 PetscInt nz,*count; 1720 1721 /* p0 */ 1722 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1723 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1724 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1725 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1726 for (i=0;i<nz;i++) vals[i] = 1.; 1727 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1728 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1729 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1730 /* v_I */ 1731 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1732 for (i=0;i<nz;i++) vals[i] = 0.; 1733 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1734 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1735 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1736 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1737 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1738 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1739 if (dirIS) { 1740 PetscInt n; 1741 1742 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1743 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1744 for (i=0;i<n;i++) vals[i] = 0.; 1745 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1746 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1747 } 1748 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1749 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1750 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1751 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1752 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1753 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1754 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1755 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1756 ierr = PetscFree(vals);CHKERRQ(ierr); 1757 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1758 1759 /* there should not be any pressure dofs lying on the interface */ 1760 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1761 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1762 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1763 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1764 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1765 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1766 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1767 ierr = PetscFree(count);CHKERRQ(ierr); 1768 } 1769 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1770 1771 /* check PCBDDCBenignGetOrSetP0 */ 1772 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1773 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1774 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1775 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1776 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1777 for (i=0;i<pcbddc->benign_n;i++) { 1778 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1779 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1780 } 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1786 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1787 { 1788 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1789 IS pressures,zerodiag,*zerodiag_subs; 1790 PetscInt nz,n; 1791 PetscInt *interior_dofs,n_interior_dofs; 1792 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1793 PetscErrorCode ierr; 1794 1795 PetscFunctionBegin; 1796 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1797 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1798 for (n=0;n<pcbddc->benign_n;n++) { 1799 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1800 } 1801 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1802 pcbddc->benign_n = 0; 1803 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1804 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1805 Checks if all the pressure dofs in each subdomain have a zero diagonal 1806 If not, a change of basis on pressures is not needed 1807 since the local Schur complements are already SPD 1808 */ 1809 has_null_pressures = PETSC_TRUE; 1810 have_null = PETSC_TRUE; 1811 if (pcbddc->n_ISForDofsLocal) { 1812 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1813 1814 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1815 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1816 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1817 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1818 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1819 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1820 if (!sorted) { 1821 ierr = ISSort(pressures);CHKERRQ(ierr); 1822 } 1823 } else { 1824 pressures = NULL; 1825 } 1826 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1827 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1828 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1829 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1830 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1831 if (!sorted) { 1832 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1833 } 1834 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1835 if (!nz) { 1836 if (n) have_null = PETSC_FALSE; 1837 has_null_pressures = PETSC_FALSE; 1838 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1839 } 1840 recompute_zerodiag = PETSC_FALSE; 1841 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1842 zerodiag_subs = NULL; 1843 pcbddc->benign_n = 0; 1844 n_interior_dofs = 0; 1845 interior_dofs = NULL; 1846 if (pcbddc->current_level) { /* need to compute interior nodes */ 1847 PetscInt n,i,j; 1848 PetscInt n_neigh,*neigh,*n_shared,**shared; 1849 PetscInt *iwork; 1850 1851 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1852 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1853 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1854 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1855 for (i=1;i<n_neigh;i++) 1856 for (j=0;j<n_shared[i];j++) 1857 iwork[shared[i][j]] += 1; 1858 for (i=0;i<n;i++) 1859 if (!iwork[i]) 1860 interior_dofs[n_interior_dofs++] = i; 1861 ierr = PetscFree(iwork);CHKERRQ(ierr); 1862 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1863 } 1864 if (has_null_pressures) { 1865 IS *subs; 1866 PetscInt nsubs,i,j,nl; 1867 const PetscInt *idxs; 1868 PetscScalar *array; 1869 Vec *work; 1870 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1871 1872 subs = pcbddc->local_subs; 1873 nsubs = pcbddc->n_local_subs; 1874 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1875 if (pcbddc->current_level) { 1876 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1877 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1878 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1879 /* work[0] = 1_p */ 1880 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1881 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1882 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1883 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1884 /* work[0] = 1_v */ 1885 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1886 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1887 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1888 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1889 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1890 } 1891 if (nsubs > 1) { 1892 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1893 for (i=0;i<nsubs;i++) { 1894 ISLocalToGlobalMapping l2g; 1895 IS t_zerodiag_subs; 1896 PetscInt nl; 1897 1898 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1899 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1900 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1901 if (nl) { 1902 PetscBool valid = PETSC_TRUE; 1903 1904 if (pcbddc->current_level) { 1905 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1906 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1907 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1908 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1909 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1910 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1911 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 1912 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 1913 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1914 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 1915 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1916 for (j=0;j<n_interior_dofs;j++) { 1917 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1918 valid = PETSC_FALSE; 1919 break; 1920 } 1921 } 1922 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1923 } 1924 if (valid && pcbddc->NeumannBoundariesLocal) { 1925 IS t_bc; 1926 PetscInt nzb; 1927 1928 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 1929 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 1930 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 1931 if (nzb) valid = PETSC_FALSE; 1932 } 1933 if (valid && pressures) { 1934 IS t_pressure_subs; 1935 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 1936 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 1937 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 1938 } 1939 if (valid) { 1940 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 1941 pcbddc->benign_n++; 1942 } else { 1943 recompute_zerodiag = PETSC_TRUE; 1944 } 1945 } 1946 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 1947 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 1948 } 1949 } else { /* there's just one subdomain (or zero if they have not been detected */ 1950 PetscBool valid = PETSC_TRUE; 1951 1952 if (pcbddc->NeumannBoundariesLocal) { 1953 PetscInt nzb; 1954 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 1955 if (nzb) valid = PETSC_FALSE; 1956 } 1957 if (valid && pressures) { 1958 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 1959 } 1960 if (valid && pcbddc->current_level) { 1961 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 1962 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 1963 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1964 for (j=0;j<n_interior_dofs;j++) { 1965 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1966 valid = PETSC_FALSE; 1967 break; 1968 } 1969 } 1970 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1971 } 1972 if (valid) { 1973 pcbddc->benign_n = 1; 1974 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 1975 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 1976 zerodiag_subs[0] = zerodiag; 1977 } 1978 } 1979 if (pcbddc->current_level) { 1980 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 1981 } 1982 } 1983 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 1984 1985 if (!pcbddc->benign_n) { 1986 PetscInt n; 1987 1988 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1989 recompute_zerodiag = PETSC_FALSE; 1990 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1991 if (n) { 1992 has_null_pressures = PETSC_FALSE; 1993 have_null = PETSC_FALSE; 1994 } 1995 } 1996 1997 /* final check for null pressures */ 1998 if (zerodiag && pressures) { 1999 PetscInt nz,np; 2000 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2001 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2002 if (nz != np) have_null = PETSC_FALSE; 2003 } 2004 2005 if (recompute_zerodiag) { 2006 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2007 if (pcbddc->benign_n == 1) { 2008 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2009 zerodiag = zerodiag_subs[0]; 2010 } else { 2011 PetscInt i,nzn,*new_idxs; 2012 2013 nzn = 0; 2014 for (i=0;i<pcbddc->benign_n;i++) { 2015 PetscInt ns; 2016 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2017 nzn += ns; 2018 } 2019 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2020 nzn = 0; 2021 for (i=0;i<pcbddc->benign_n;i++) { 2022 PetscInt ns,*idxs; 2023 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2024 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2025 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2026 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2027 nzn += ns; 2028 } 2029 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2030 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2031 } 2032 have_null = PETSC_FALSE; 2033 } 2034 2035 /* Prepare matrix to compute no-net-flux */ 2036 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2037 Mat A,loc_divudotp; 2038 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2039 IS row,col,isused = NULL; 2040 PetscInt M,N,n,st,n_isused; 2041 2042 if (pressures) { 2043 isused = pressures; 2044 } else { 2045 isused = zerodiag; 2046 } 2047 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2048 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2049 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2050 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2051 n_isused = 0; 2052 if (isused) { 2053 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2054 } 2055 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2056 st = st-n_isused; 2057 if (n) { 2058 const PetscInt *gidxs; 2059 2060 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2061 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2062 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2063 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2064 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2065 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2066 } else { 2067 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2068 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2069 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2070 } 2071 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2072 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2073 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2074 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2075 ierr = ISDestroy(&row);CHKERRQ(ierr); 2076 ierr = ISDestroy(&col);CHKERRQ(ierr); 2077 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2078 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2079 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2080 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2081 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2082 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2083 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2084 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2085 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2086 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 } 2088 2089 /* change of basis and p0 dofs */ 2090 if (has_null_pressures) { 2091 IS zerodiagc; 2092 const PetscInt *idxs,*idxsc; 2093 PetscInt i,s,*nnz; 2094 2095 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2096 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2097 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2098 /* local change of basis for pressures */ 2099 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2100 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2101 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2102 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2103 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2104 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2105 for (i=0;i<pcbddc->benign_n;i++) { 2106 PetscInt nzs,j; 2107 2108 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2109 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2110 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2111 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2112 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2113 } 2114 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2115 ierr = PetscFree(nnz);CHKERRQ(ierr); 2116 /* set identity on velocities */ 2117 for (i=0;i<n-nz;i++) { 2118 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2119 } 2120 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2121 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2122 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2123 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2124 /* set change on pressures */ 2125 for (s=0;s<pcbddc->benign_n;s++) { 2126 PetscScalar *array; 2127 PetscInt nzs; 2128 2129 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2130 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2131 for (i=0;i<nzs-1;i++) { 2132 PetscScalar vals[2]; 2133 PetscInt cols[2]; 2134 2135 cols[0] = idxs[i]; 2136 cols[1] = idxs[nzs-1]; 2137 vals[0] = 1.; 2138 vals[1] = 1.; 2139 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2140 } 2141 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2142 for (i=0;i<nzs-1;i++) array[i] = -1.; 2143 array[nzs-1] = 1.; 2144 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2145 /* store local idxs for p0 */ 2146 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2147 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2148 ierr = PetscFree(array);CHKERRQ(ierr); 2149 } 2150 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2151 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2152 /* project if needed */ 2153 if (pcbddc->benign_change_explicit) { 2154 Mat M; 2155 2156 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2157 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2158 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2159 ierr = MatDestroy(&M);CHKERRQ(ierr); 2160 } 2161 /* store global idxs for p0 */ 2162 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2163 } 2164 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2165 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2166 2167 /* determines if the coarse solver will be singular or not */ 2168 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2169 /* determines if the problem has subdomains with 0 pressure block */ 2170 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2171 *zerodiaglocal = zerodiag; 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2177 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2178 { 2179 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2180 PetscScalar *array; 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 if (!pcbddc->benign_sf) { 2185 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2186 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2187 } 2188 if (get) { 2189 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2190 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2191 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2192 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2193 } else { 2194 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2195 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2196 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2197 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2198 } 2199 PetscFunctionReturn(0); 2200 } 2201 2202 #undef __FUNCT__ 2203 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2204 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2205 { 2206 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2207 PetscErrorCode ierr; 2208 2209 PetscFunctionBegin; 2210 /* TODO: add error checking 2211 - avoid nested pop (or push) calls. 2212 - cannot push before pop. 2213 - cannot call this if pcbddc->local_mat is NULL 2214 */ 2215 if (!pcbddc->benign_n) { 2216 PetscFunctionReturn(0); 2217 } 2218 if (pop) { 2219 if (pcbddc->benign_change_explicit) { 2220 IS is_p0; 2221 MatReuse reuse; 2222 2223 /* extract B_0 */ 2224 reuse = MAT_INITIAL_MATRIX; 2225 if (pcbddc->benign_B0) { 2226 reuse = MAT_REUSE_MATRIX; 2227 } 2228 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2229 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2230 /* remove rows and cols from local problem */ 2231 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2232 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2233 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2234 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2235 } else { 2236 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2237 PetscScalar *vals; 2238 PetscInt i,n,*idxs_ins; 2239 2240 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2241 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2242 if (!pcbddc->benign_B0) { 2243 PetscInt *nnz; 2244 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2245 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2246 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2247 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2248 for (i=0;i<pcbddc->benign_n;i++) { 2249 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2250 nnz[i] = n - nnz[i]; 2251 } 2252 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2253 ierr = PetscFree(nnz);CHKERRQ(ierr); 2254 } 2255 2256 for (i=0;i<pcbddc->benign_n;i++) { 2257 PetscScalar *array; 2258 PetscInt *idxs,j,nz,cum; 2259 2260 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2261 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2262 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2263 for (j=0;j<nz;j++) vals[j] = 1.; 2264 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2265 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2266 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2267 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2268 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2269 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2270 cum = 0; 2271 for (j=0;j<n;j++) { 2272 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2273 vals[cum] = array[j]; 2274 idxs_ins[cum] = j; 2275 cum++; 2276 } 2277 } 2278 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2279 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2280 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2281 } 2282 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2283 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2284 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2285 } 2286 } else { /* push */ 2287 if (pcbddc->benign_change_explicit) { 2288 PetscInt i; 2289 2290 for (i=0;i<pcbddc->benign_n;i++) { 2291 PetscScalar *B0_vals; 2292 PetscInt *B0_cols,B0_ncol; 2293 2294 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2295 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2296 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2297 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2298 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2299 } 2300 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2301 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2302 } else { 2303 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2304 } 2305 } 2306 PetscFunctionReturn(0); 2307 } 2308 2309 #undef __FUNCT__ 2310 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2311 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2312 { 2313 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2314 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2315 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2316 PetscBLASInt *B_iwork,*B_ifail; 2317 PetscScalar *work,lwork; 2318 PetscScalar *St,*S,*eigv; 2319 PetscScalar *Sarray,*Starray; 2320 PetscReal *eigs,thresh; 2321 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2322 PetscBool allocated_S_St; 2323 #if defined(PETSC_USE_COMPLEX) 2324 PetscReal *rwork; 2325 #endif 2326 PetscErrorCode ierr; 2327 2328 PetscFunctionBegin; 2329 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2330 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2331 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2332 2333 if (pcbddc->dbg_flag) { 2334 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2335 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2336 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2337 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2338 } 2339 2340 if (pcbddc->dbg_flag) { 2341 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2342 } 2343 2344 /* max size of subsets */ 2345 mss = 0; 2346 for (i=0;i<sub_schurs->n_subs;i++) { 2347 PetscInt subset_size; 2348 2349 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2350 mss = PetscMax(mss,subset_size); 2351 } 2352 2353 /* min/max and threshold */ 2354 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2355 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2356 nmax = PetscMax(nmin,nmax); 2357 allocated_S_St = PETSC_FALSE; 2358 if (nmin) { 2359 allocated_S_St = PETSC_TRUE; 2360 } 2361 2362 /* allocate lapack workspace */ 2363 cum = cum2 = 0; 2364 maxneigs = 0; 2365 for (i=0;i<sub_schurs->n_subs;i++) { 2366 PetscInt n,subset_size; 2367 2368 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2369 n = PetscMin(subset_size,nmax); 2370 cum += subset_size; 2371 cum2 += subset_size*n; 2372 maxneigs = PetscMax(maxneigs,n); 2373 } 2374 if (mss) { 2375 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2376 PetscBLASInt B_itype = 1; 2377 PetscBLASInt B_N = mss; 2378 PetscReal zero = 0.0; 2379 PetscReal eps = 0.0; /* dlamch? */ 2380 2381 B_lwork = -1; 2382 S = NULL; 2383 St = NULL; 2384 eigs = NULL; 2385 eigv = NULL; 2386 B_iwork = NULL; 2387 B_ifail = NULL; 2388 #if defined(PETSC_USE_COMPLEX) 2389 rwork = NULL; 2390 #endif 2391 thresh = 1.0; 2392 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2393 #if defined(PETSC_USE_COMPLEX) 2394 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2395 #else 2396 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2397 #endif 2398 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2399 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2400 } else { 2401 /* TODO */ 2402 } 2403 } else { 2404 lwork = 0; 2405 } 2406 2407 nv = 0; 2408 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2409 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2410 } 2411 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2412 if (allocated_S_St) { 2413 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2414 } 2415 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2416 #if defined(PETSC_USE_COMPLEX) 2417 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2418 #endif 2419 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2420 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2421 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2422 nv+cum,&pcbddc->adaptive_constraints_idxs, 2423 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2424 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2425 2426 maxneigs = 0; 2427 cum = cumarray = 0; 2428 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2429 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2430 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2431 const PetscInt *idxs; 2432 2433 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2434 for (cum=0;cum<nv;cum++) { 2435 pcbddc->adaptive_constraints_n[cum] = 1; 2436 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2437 pcbddc->adaptive_constraints_data[cum] = 1.0; 2438 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2439 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2440 } 2441 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2442 } 2443 2444 if (mss) { /* multilevel */ 2445 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2446 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2447 } 2448 2449 thresh = pcbddc->adaptive_threshold; 2450 for (i=0;i<sub_schurs->n_subs;i++) { 2451 const PetscInt *idxs; 2452 PetscReal upper,lower; 2453 PetscInt j,subset_size,eigs_start = 0; 2454 PetscBLASInt B_N; 2455 PetscBool same_data = PETSC_FALSE; 2456 2457 if (pcbddc->use_deluxe_scaling) { 2458 upper = PETSC_MAX_REAL; 2459 lower = thresh; 2460 } else { 2461 upper = 1./thresh; 2462 lower = 0.; 2463 } 2464 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2465 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2466 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2467 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2468 if (sub_schurs->is_hermitian) { 2469 PetscInt j,k; 2470 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2471 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2472 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2473 } 2474 for (j=0;j<subset_size;j++) { 2475 for (k=j;k<subset_size;k++) { 2476 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2477 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2478 } 2479 } 2480 } else { 2481 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2482 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2483 } 2484 } else { 2485 S = Sarray + cumarray; 2486 St = Starray + cumarray; 2487 } 2488 /* see if we can save some work */ 2489 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2490 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2491 } 2492 2493 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2494 B_neigs = 0; 2495 } else { 2496 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2497 PetscBLASInt B_itype = 1; 2498 PetscBLASInt B_IL, B_IU; 2499 PetscReal eps = -1.0; /* dlamch? */ 2500 PetscInt nmin_s; 2501 PetscBool compute_range = PETSC_FALSE; 2502 2503 if (pcbddc->dbg_flag) { 2504 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2505 } 2506 2507 compute_range = PETSC_FALSE; 2508 if (thresh > 1.+PETSC_SMALL && !same_data) { 2509 compute_range = PETSC_TRUE; 2510 } 2511 2512 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2513 if (compute_range) { 2514 2515 /* ask for eigenvalues larger than thresh */ 2516 #if defined(PETSC_USE_COMPLEX) 2517 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2518 #else 2519 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2520 #endif 2521 } else if (!same_data) { 2522 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2523 B_IL = 1; 2524 #if defined(PETSC_USE_COMPLEX) 2525 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2526 #else 2527 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2528 #endif 2529 } else { /* same_data is true, so get the adaptive function requested by the user */ 2530 PetscInt k; 2531 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2532 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2533 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2534 nmin = nmax; 2535 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2536 for (k=0;k<nmax;k++) { 2537 eigs[k] = 1./PETSC_SMALL; 2538 eigv[k*(subset_size+1)] = 1.0; 2539 } 2540 } 2541 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2542 if (B_ierr) { 2543 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2544 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2545 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2546 } 2547 2548 if (B_neigs > nmax) { 2549 if (pcbddc->dbg_flag) { 2550 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2551 } 2552 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2553 B_neigs = nmax; 2554 } 2555 2556 nmin_s = PetscMin(nmin,B_N); 2557 if (B_neigs < nmin_s) { 2558 PetscBLASInt B_neigs2; 2559 2560 if (pcbddc->use_deluxe_scaling) { 2561 B_IL = B_N - nmin_s + 1; 2562 B_IU = B_N - B_neigs; 2563 } else { 2564 B_IL = B_neigs + 1; 2565 B_IU = nmin_s; 2566 } 2567 if (pcbddc->dbg_flag) { 2568 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2569 } 2570 if (sub_schurs->is_hermitian) { 2571 PetscInt j,k; 2572 for (j=0;j<subset_size;j++) { 2573 for (k=j;k<subset_size;k++) { 2574 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2575 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2576 } 2577 } 2578 } else { 2579 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2580 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2581 } 2582 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2583 #if defined(PETSC_USE_COMPLEX) 2584 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2585 #else 2586 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2587 #endif 2588 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2589 B_neigs += B_neigs2; 2590 } 2591 if (B_ierr) { 2592 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2593 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2594 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2595 } 2596 if (pcbddc->dbg_flag) { 2597 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2598 for (j=0;j<B_neigs;j++) { 2599 if (eigs[j] == 0.0) { 2600 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2601 } else { 2602 if (pcbddc->use_deluxe_scaling) { 2603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2604 } else { 2605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2606 } 2607 } 2608 } 2609 } 2610 } else { 2611 /* TODO */ 2612 } 2613 } 2614 /* change the basis back to the original one */ 2615 if (sub_schurs->change) { 2616 Mat change,phi,phit; 2617 2618 if (pcbddc->dbg_flag > 1) { 2619 PetscInt ii; 2620 for (ii=0;ii<B_neigs;ii++) { 2621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2622 for (j=0;j<B_N;j++) { 2623 #if defined(PETSC_USE_COMPLEX) 2624 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2625 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2627 #else 2628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2629 #endif 2630 } 2631 } 2632 } 2633 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2634 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2635 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2636 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2637 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2638 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2639 } 2640 maxneigs = PetscMax(B_neigs,maxneigs); 2641 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2642 if (B_neigs) { 2643 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2644 2645 if (pcbddc->dbg_flag > 1) { 2646 PetscInt ii; 2647 for (ii=0;ii<B_neigs;ii++) { 2648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2649 for (j=0;j<B_N;j++) { 2650 #if defined(PETSC_USE_COMPLEX) 2651 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2652 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2653 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2654 #else 2655 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2656 #endif 2657 } 2658 } 2659 } 2660 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2661 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2662 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2663 cum++; 2664 } 2665 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2666 /* shift for next computation */ 2667 cumarray += subset_size*subset_size; 2668 } 2669 if (pcbddc->dbg_flag) { 2670 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2671 } 2672 2673 if (mss) { 2674 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2675 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2676 /* destroy matrices (junk) */ 2677 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2678 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2679 } 2680 if (allocated_S_St) { 2681 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2682 } 2683 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2684 #if defined(PETSC_USE_COMPLEX) 2685 ierr = PetscFree(rwork);CHKERRQ(ierr); 2686 #endif 2687 if (pcbddc->dbg_flag) { 2688 PetscInt maxneigs_r; 2689 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2690 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2691 } 2692 PetscFunctionReturn(0); 2693 } 2694 2695 #undef __FUNCT__ 2696 #define __FUNCT__ "PCBDDCSetUpSolvers" 2697 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2698 { 2699 PetscScalar *coarse_submat_vals; 2700 PetscErrorCode ierr; 2701 2702 PetscFunctionBegin; 2703 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2704 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2705 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2706 2707 /* Setup local neumann solver ksp_R */ 2708 /* PCBDDCSetUpLocalScatters should be called first! */ 2709 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2710 2711 /* 2712 Setup local correction and local part of coarse basis. 2713 Gives back the dense local part of the coarse matrix in column major ordering 2714 */ 2715 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2716 2717 /* Compute total number of coarse nodes and setup coarse solver */ 2718 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2719 2720 /* free */ 2721 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2722 PetscFunctionReturn(0); 2723 } 2724 2725 #undef __FUNCT__ 2726 #define __FUNCT__ "PCBDDCResetCustomization" 2727 PetscErrorCode PCBDDCResetCustomization(PC pc) 2728 { 2729 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2730 PetscErrorCode ierr; 2731 2732 PetscFunctionBegin; 2733 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2734 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2735 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2736 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2737 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2738 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2739 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2740 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2741 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2742 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2743 PetscFunctionReturn(0); 2744 } 2745 2746 #undef __FUNCT__ 2747 #define __FUNCT__ "PCBDDCResetTopography" 2748 PetscErrorCode PCBDDCResetTopography(PC pc) 2749 { 2750 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2751 PetscInt i; 2752 PetscErrorCode ierr; 2753 2754 PetscFunctionBegin; 2755 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2756 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2757 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2758 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2759 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2760 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2761 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2762 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2763 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2764 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2765 for (i=0;i<pcbddc->n_local_subs;i++) { 2766 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2767 } 2768 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2769 if (pcbddc->sub_schurs) { 2770 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2771 } 2772 pcbddc->graphanalyzed = PETSC_FALSE; 2773 pcbddc->recompute_topography = PETSC_TRUE; 2774 PetscFunctionReturn(0); 2775 } 2776 2777 #undef __FUNCT__ 2778 #define __FUNCT__ "PCBDDCResetSolvers" 2779 PetscErrorCode PCBDDCResetSolvers(PC pc) 2780 { 2781 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2782 PetscErrorCode ierr; 2783 2784 PetscFunctionBegin; 2785 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2786 if (pcbddc->coarse_phi_B) { 2787 PetscScalar *array; 2788 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2789 ierr = PetscFree(array);CHKERRQ(ierr); 2790 } 2791 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2792 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2793 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2794 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2795 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2796 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2797 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2798 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2799 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2800 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2801 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2802 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2803 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2804 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2805 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2806 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2807 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2808 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2809 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2810 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2811 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2812 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2813 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2814 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2815 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2816 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2817 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2818 if (pcbddc->benign_zerodiag_subs) { 2819 PetscInt i; 2820 for (i=0;i<pcbddc->benign_n;i++) { 2821 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2822 } 2823 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2824 } 2825 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2826 PetscFunctionReturn(0); 2827 } 2828 2829 #undef __FUNCT__ 2830 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2831 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2832 { 2833 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2834 PC_IS *pcis = (PC_IS*)pc->data; 2835 VecType impVecType; 2836 PetscInt n_constraints,n_R,old_size; 2837 PetscErrorCode ierr; 2838 2839 PetscFunctionBegin; 2840 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2841 /* get sizes */ 2842 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2843 n_R = pcis->n - pcbddc->n_vertices; 2844 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2845 /* local work vectors (try to avoid unneeded work)*/ 2846 /* R nodes */ 2847 old_size = -1; 2848 if (pcbddc->vec1_R) { 2849 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2850 } 2851 if (n_R != old_size) { 2852 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2853 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2854 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2855 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2856 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2857 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2858 } 2859 /* local primal dofs */ 2860 old_size = -1; 2861 if (pcbddc->vec1_P) { 2862 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2863 } 2864 if (pcbddc->local_primal_size != old_size) { 2865 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2866 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2867 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2868 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2869 } 2870 /* local explicit constraints */ 2871 old_size = -1; 2872 if (pcbddc->vec1_C) { 2873 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2874 } 2875 if (n_constraints && n_constraints != old_size) { 2876 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2877 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2878 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2879 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2880 } 2881 PetscFunctionReturn(0); 2882 } 2883 2884 #undef __FUNCT__ 2885 #define __FUNCT__ "PCBDDCSetUpCorrection" 2886 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2887 { 2888 PetscErrorCode ierr; 2889 /* pointers to pcis and pcbddc */ 2890 PC_IS* pcis = (PC_IS*)pc->data; 2891 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2892 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2893 /* submatrices of local problem */ 2894 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2895 /* submatrices of local coarse problem */ 2896 Mat S_VV,S_CV,S_VC,S_CC; 2897 /* working matrices */ 2898 Mat C_CR; 2899 /* additional working stuff */ 2900 PC pc_R; 2901 Mat F; 2902 Vec dummy_vec; 2903 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2904 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2905 PetscScalar *work; 2906 PetscInt *idx_V_B; 2907 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2908 PetscInt i,n_R,n_D,n_B; 2909 2910 /* some shortcuts to scalars */ 2911 PetscScalar one=1.0,m_one=-1.0; 2912 2913 PetscFunctionBegin; 2914 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 2915 2916 /* Set Non-overlapping dimensions */ 2917 n_vertices = pcbddc->n_vertices; 2918 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 2919 n_B = pcis->n_B; 2920 n_D = pcis->n - n_B; 2921 n_R = pcis->n - n_vertices; 2922 2923 /* vertices in boundary numbering */ 2924 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 2925 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 2926 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 2927 2928 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2929 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 2930 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 2931 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 2932 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 2933 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 2934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 2935 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 2936 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 2937 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 2938 2939 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 2940 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 2941 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 2942 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 2943 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 2944 lda_rhs = n_R; 2945 need_benign_correction = PETSC_FALSE; 2946 if (isLU || isILU || isCHOL) { 2947 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 2948 } else if (sub_schurs && sub_schurs->reuse_solver) { 2949 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2950 MatFactorType type; 2951 2952 F = reuse_solver->F; 2953 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 2954 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 2955 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 2956 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 2957 } else { 2958 F = NULL; 2959 } 2960 2961 /* allocate workspace */ 2962 n = 0; 2963 if (n_constraints) { 2964 n += lda_rhs*n_constraints; 2965 } 2966 if (n_vertices) { 2967 n = PetscMax(2*lda_rhs*n_vertices,n); 2968 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 2969 } 2970 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 2971 2972 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 2973 dummy_vec = NULL; 2974 if (need_benign_correction && lda_rhs != n_R && F) { 2975 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 2976 } 2977 2978 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2979 if (n_constraints) { 2980 Mat M1,M2,M3,C_B; 2981 IS is_aux; 2982 PetscScalar *array,*array2; 2983 2984 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2985 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2986 2987 /* Extract constraints on R nodes: C_{CR} */ 2988 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 2989 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2990 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2991 2992 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 2993 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 2994 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2995 for (i=0;i<n_constraints;i++) { 2996 const PetscScalar *row_cmat_values; 2997 const PetscInt *row_cmat_indices; 2998 PetscInt size_of_constraint,j; 2999 3000 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3001 for (j=0;j<size_of_constraint;j++) { 3002 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3003 } 3004 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3005 } 3006 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3007 if (F) { 3008 Mat B; 3009 3010 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3011 if (need_benign_correction) { 3012 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3013 3014 /* rhs is already zero on interior dofs, no need to change the rhs */ 3015 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3016 } 3017 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3018 if (need_benign_correction) { 3019 PetscScalar *marr; 3020 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3021 3022 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3023 if (lda_rhs != n_R) { 3024 for (i=0;i<n_constraints;i++) { 3025 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3026 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3027 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3028 } 3029 } else { 3030 for (i=0;i<n_constraints;i++) { 3031 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3032 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3033 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3034 } 3035 } 3036 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3037 } 3038 ierr = MatDestroy(&B);CHKERRQ(ierr); 3039 } else { 3040 PetscScalar *marr; 3041 3042 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3043 for (i=0;i<n_constraints;i++) { 3044 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3045 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3046 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3047 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3048 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3049 } 3050 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3051 } 3052 if (!pcbddc->switch_static) { 3053 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3054 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3055 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3056 for (i=0;i<n_constraints;i++) { 3057 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3058 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3059 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3060 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3061 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3062 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3063 } 3064 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3065 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3066 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3067 } else { 3068 if (lda_rhs != n_R) { 3069 IS dummy; 3070 3071 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3072 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3073 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3074 } else { 3075 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3076 pcbddc->local_auxmat2 = local_auxmat2_R; 3077 } 3078 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3079 } 3080 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3081 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3082 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3083 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3084 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3085 if (isCHOL) { 3086 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3087 } else { 3088 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3089 } 3090 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3091 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3092 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3093 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3094 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3095 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3096 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3097 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3098 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3099 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3100 } 3101 3102 /* Get submatrices from subdomain matrix */ 3103 if (n_vertices) { 3104 IS is_aux; 3105 3106 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3107 IS tis; 3108 3109 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3110 ierr = ISSort(tis);CHKERRQ(ierr); 3111 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3112 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3113 } else { 3114 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3115 } 3116 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3117 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3118 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3119 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3120 } 3121 3122 /* Matrix of coarse basis functions (local) */ 3123 if (pcbddc->coarse_phi_B) { 3124 PetscInt on_B,on_primal,on_D=n_D; 3125 if (pcbddc->coarse_phi_D) { 3126 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3127 } 3128 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3129 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3130 PetscScalar *marray; 3131 3132 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3133 ierr = PetscFree(marray);CHKERRQ(ierr); 3134 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3135 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3136 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3137 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3138 } 3139 } 3140 3141 if (!pcbddc->coarse_phi_B) { 3142 PetscScalar *marray; 3143 3144 n = n_B*pcbddc->local_primal_size; 3145 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3146 n += n_D*pcbddc->local_primal_size; 3147 } 3148 if (!pcbddc->symmetric_primal) { 3149 n *= 2; 3150 } 3151 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3153 n = n_B*pcbddc->local_primal_size; 3154 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3156 n += n_D*pcbddc->local_primal_size; 3157 } 3158 if (!pcbddc->symmetric_primal) { 3159 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3160 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3161 n = n_B*pcbddc->local_primal_size; 3162 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3163 } 3164 } else { 3165 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3166 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3167 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3168 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3169 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3170 } 3171 } 3172 } 3173 3174 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3175 p0_lidx_I = NULL; 3176 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3177 const PetscInt *idxs; 3178 3179 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3180 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3181 for (i=0;i<pcbddc->benign_n;i++) { 3182 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3183 } 3184 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3185 } 3186 3187 /* vertices */ 3188 if (n_vertices) { 3189 3190 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3191 3192 if (n_R) { 3193 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3194 PetscBLASInt B_N,B_one = 1; 3195 PetscScalar *x,*y; 3196 PetscBool isseqaij; 3197 3198 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3199 if (need_benign_correction) { 3200 ISLocalToGlobalMapping RtoN; 3201 IS is_p0; 3202 PetscInt *idxs_p0,n; 3203 3204 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3205 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3206 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3207 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3208 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3209 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3210 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3211 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3212 } 3213 3214 if (lda_rhs == n_R) { 3215 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3216 } else { 3217 PetscScalar *av,*array; 3218 const PetscInt *xadj,*adjncy; 3219 PetscInt n; 3220 PetscBool flg_row; 3221 3222 array = work+lda_rhs*n_vertices; 3223 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3224 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3225 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3226 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3227 for (i=0;i<n;i++) { 3228 PetscInt j; 3229 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3230 } 3231 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3232 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3233 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3234 } 3235 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3236 if (need_benign_correction) { 3237 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3238 PetscScalar *marr; 3239 3240 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3241 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3242 3243 | 0 0 0 | (V) 3244 L = | 0 0 -1 | (P-p0) 3245 | 0 0 -1 | (p0) 3246 3247 */ 3248 for (i=0;i<reuse_solver->benign_n;i++) { 3249 const PetscScalar *vals; 3250 const PetscInt *idxs,*idxs_zero; 3251 PetscInt n,j,nz; 3252 3253 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3254 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3255 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3256 for (j=0;j<n;j++) { 3257 PetscScalar val = vals[j]; 3258 PetscInt k,col = idxs[j]; 3259 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3260 } 3261 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3262 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3263 } 3264 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3265 } 3266 if (F) { 3267 /* need to correct the rhs */ 3268 if (need_benign_correction) { 3269 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3270 PetscScalar *marr; 3271 3272 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3273 if (lda_rhs != n_R) { 3274 for (i=0;i<n_vertices;i++) { 3275 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3276 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3277 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3278 } 3279 } else { 3280 for (i=0;i<n_vertices;i++) { 3281 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3282 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3283 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3284 } 3285 } 3286 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3287 } 3288 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3289 /* need to correct the solution */ 3290 if (need_benign_correction) { 3291 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3292 PetscScalar *marr; 3293 3294 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3295 if (lda_rhs != n_R) { 3296 for (i=0;i<n_vertices;i++) { 3297 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3298 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3299 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3300 } 3301 } else { 3302 for (i=0;i<n_vertices;i++) { 3303 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3304 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3305 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3306 } 3307 } 3308 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3309 } 3310 } else { 3311 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3312 for (i=0;i<n_vertices;i++) { 3313 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3314 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3315 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3316 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3317 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3318 } 3319 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3320 } 3321 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3322 /* S_VV and S_CV */ 3323 if (n_constraints) { 3324 Mat B; 3325 3326 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3327 for (i=0;i<n_vertices;i++) { 3328 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3329 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3330 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3331 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3332 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3333 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3334 } 3335 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3336 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3337 ierr = MatDestroy(&B);CHKERRQ(ierr); 3338 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3339 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3340 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3341 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3342 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3343 ierr = MatDestroy(&B);CHKERRQ(ierr); 3344 } 3345 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3346 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3347 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3348 } 3349 if (lda_rhs != n_R) { 3350 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3351 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3352 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3353 } 3354 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3355 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3356 if (need_benign_correction) { 3357 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3358 PetscScalar *marr,*sums; 3359 3360 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3361 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3362 for (i=0;i<reuse_solver->benign_n;i++) { 3363 const PetscScalar *vals; 3364 const PetscInt *idxs,*idxs_zero; 3365 PetscInt n,j,nz; 3366 3367 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3368 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3369 for (j=0;j<n_vertices;j++) { 3370 PetscInt k; 3371 sums[j] = 0.; 3372 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3373 } 3374 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3375 for (j=0;j<n;j++) { 3376 PetscScalar val = vals[j]; 3377 PetscInt k; 3378 for (k=0;k<n_vertices;k++) { 3379 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3380 } 3381 } 3382 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3383 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3384 } 3385 ierr = PetscFree(sums);CHKERRQ(ierr); 3386 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3387 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3388 } 3389 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3390 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3391 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3392 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3393 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3394 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3395 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3396 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3397 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3398 } else { 3399 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3400 } 3401 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3402 3403 /* coarse basis functions */ 3404 for (i=0;i<n_vertices;i++) { 3405 PetscScalar *y; 3406 3407 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3408 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3409 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3410 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3411 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3412 y[n_B*i+idx_V_B[i]] = 1.0; 3413 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3414 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3415 3416 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3417 PetscInt j; 3418 3419 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3420 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3421 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3422 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3423 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3424 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3425 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3426 } 3427 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3428 } 3429 /* if n_R == 0 the object is not destroyed */ 3430 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3431 } 3432 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3433 3434 if (n_constraints) { 3435 Mat B; 3436 3437 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3438 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3439 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3440 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3441 if (n_vertices) { 3442 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3443 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3444 } else { 3445 Mat S_VCt; 3446 3447 if (lda_rhs != n_R) { 3448 ierr = MatDestroy(&B);CHKERRQ(ierr); 3449 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3450 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3451 } 3452 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3453 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3454 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3455 } 3456 } 3457 ierr = MatDestroy(&B);CHKERRQ(ierr); 3458 /* coarse basis functions */ 3459 for (i=0;i<n_constraints;i++) { 3460 PetscScalar *y; 3461 3462 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3463 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3464 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3465 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3466 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3467 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3468 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3469 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3470 PetscInt j; 3471 3472 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3473 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3474 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3475 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3476 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3477 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3478 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3479 } 3480 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3481 } 3482 } 3483 if (n_constraints) { 3484 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3485 } 3486 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3487 3488 /* coarse matrix entries relative to B_0 */ 3489 if (pcbddc->benign_n) { 3490 Mat B0_B,B0_BPHI; 3491 IS is_dummy; 3492 PetscScalar *data; 3493 PetscInt j; 3494 3495 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3496 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3497 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3498 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3499 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3500 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3501 for (j=0;j<pcbddc->benign_n;j++) { 3502 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3503 for (i=0;i<pcbddc->local_primal_size;i++) { 3504 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3505 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3506 } 3507 } 3508 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3509 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3510 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3511 } 3512 3513 /* compute other basis functions for non-symmetric problems */ 3514 if (!pcbddc->symmetric_primal) { 3515 Mat B_V=NULL,B_C=NULL; 3516 PetscScalar *marray; 3517 3518 if (n_constraints) { 3519 Mat S_CCT,C_CRT; 3520 3521 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3522 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3523 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3524 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3525 if (n_vertices) { 3526 Mat S_VCT; 3527 3528 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3529 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3530 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3531 } 3532 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3533 } else { 3534 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3535 } 3536 if (n_vertices && n_R) { 3537 PetscScalar *av,*marray; 3538 const PetscInt *xadj,*adjncy; 3539 PetscInt n; 3540 PetscBool flg_row; 3541 3542 /* B_V = B_V - A_VR^T */ 3543 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3544 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3545 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3546 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3547 for (i=0;i<n;i++) { 3548 PetscInt j; 3549 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3550 } 3551 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3552 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3553 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3554 } 3555 3556 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3557 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3558 for (i=0;i<n_vertices;i++) { 3559 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3560 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3561 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3562 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3563 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3564 } 3565 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3566 if (B_C) { 3567 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3568 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3569 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3570 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3571 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3572 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3573 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3574 } 3575 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3576 } 3577 /* coarse basis functions */ 3578 for (i=0;i<pcbddc->local_primal_size;i++) { 3579 PetscScalar *y; 3580 3581 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3582 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3583 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3584 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3585 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3586 if (i<n_vertices) { 3587 y[n_B*i+idx_V_B[i]] = 1.0; 3588 } 3589 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3590 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3591 3592 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3593 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3594 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3595 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3596 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3597 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3598 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3599 } 3600 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3601 } 3602 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3603 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3604 } 3605 /* free memory */ 3606 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3607 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3608 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3609 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3610 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3611 ierr = PetscFree(work);CHKERRQ(ierr); 3612 if (n_vertices) { 3613 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3614 } 3615 if (n_constraints) { 3616 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3617 } 3618 /* Checking coarse_sub_mat and coarse basis functios */ 3619 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3620 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3621 if (pcbddc->dbg_flag) { 3622 Mat coarse_sub_mat; 3623 Mat AUXMAT,TM1,TM2,TM3,TM4; 3624 Mat coarse_phi_D,coarse_phi_B; 3625 Mat coarse_psi_D,coarse_psi_B; 3626 Mat A_II,A_BB,A_IB,A_BI; 3627 Mat C_B,CPHI; 3628 IS is_dummy; 3629 Vec mones; 3630 MatType checkmattype=MATSEQAIJ; 3631 PetscReal real_value; 3632 3633 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3634 Mat A; 3635 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3636 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3637 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3638 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3639 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3640 ierr = MatDestroy(&A);CHKERRQ(ierr); 3641 } else { 3642 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3643 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3644 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3645 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3646 } 3647 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3648 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3649 if (!pcbddc->symmetric_primal) { 3650 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3651 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3652 } 3653 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3654 3655 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3656 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3657 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3658 if (!pcbddc->symmetric_primal) { 3659 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3660 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3661 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3662 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3663 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3664 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3665 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3666 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3667 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3668 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3669 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3670 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3671 } else { 3672 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3673 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3674 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3675 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3676 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3677 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3678 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3680 } 3681 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3682 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3683 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3684 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3685 if (pcbddc->benign_n) { 3686 Mat B0_B,B0_BPHI; 3687 PetscScalar *data,*data2; 3688 PetscInt j; 3689 3690 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3691 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3692 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3693 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3694 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3695 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3696 for (j=0;j<pcbddc->benign_n;j++) { 3697 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3698 for (i=0;i<pcbddc->local_primal_size;i++) { 3699 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3700 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3701 } 3702 } 3703 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3704 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3705 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3706 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3707 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3708 } 3709 #if 0 3710 { 3711 PetscViewer viewer; 3712 char filename[256]; 3713 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3714 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3715 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3716 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3717 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3718 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3719 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3720 if (save_change) { 3721 Mat phi_B; 3722 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3723 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3724 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3725 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3726 } else { 3727 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3728 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3729 } 3730 if (pcbddc->coarse_phi_D) { 3731 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3732 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3733 } 3734 if (pcbddc->coarse_psi_B) { 3735 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3736 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3737 } 3738 if (pcbddc->coarse_psi_D) { 3739 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3740 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3741 } 3742 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3743 } 3744 #endif 3745 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3746 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3747 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3748 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3749 3750 /* check constraints */ 3751 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3752 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3753 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3754 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3755 } else { 3756 PetscScalar *data; 3757 Mat tmat; 3758 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3759 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3760 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3761 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3762 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3763 } 3764 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3765 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3766 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3767 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3768 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3769 if (!pcbddc->symmetric_primal) { 3770 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3771 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3772 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3773 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3774 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3775 } 3776 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3777 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3778 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3779 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3780 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3781 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3782 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3783 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3784 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3785 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3786 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3787 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3788 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3789 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3790 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3791 if (!pcbddc->symmetric_primal) { 3792 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3793 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3794 } 3795 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3796 } 3797 /* get back data */ 3798 *coarse_submat_vals_n = coarse_submat_vals; 3799 PetscFunctionReturn(0); 3800 } 3801 3802 #undef __FUNCT__ 3803 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3804 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3805 { 3806 Mat *work_mat; 3807 IS isrow_s,iscol_s; 3808 PetscBool rsorted,csorted; 3809 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3810 PetscErrorCode ierr; 3811 3812 PetscFunctionBegin; 3813 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3814 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3815 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3816 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3817 3818 if (!rsorted) { 3819 const PetscInt *idxs; 3820 PetscInt *idxs_sorted,i; 3821 3822 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3823 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3824 for (i=0;i<rsize;i++) { 3825 idxs_perm_r[i] = i; 3826 } 3827 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3828 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3829 for (i=0;i<rsize;i++) { 3830 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3831 } 3832 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3833 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3834 } else { 3835 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3836 isrow_s = isrow; 3837 } 3838 3839 if (!csorted) { 3840 if (isrow == iscol) { 3841 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3842 iscol_s = isrow_s; 3843 } else { 3844 const PetscInt *idxs; 3845 PetscInt *idxs_sorted,i; 3846 3847 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3848 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3849 for (i=0;i<csize;i++) { 3850 idxs_perm_c[i] = i; 3851 } 3852 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3853 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3854 for (i=0;i<csize;i++) { 3855 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3856 } 3857 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3858 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3859 } 3860 } else { 3861 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3862 iscol_s = iscol; 3863 } 3864 3865 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3866 3867 if (!rsorted || !csorted) { 3868 Mat new_mat; 3869 IS is_perm_r,is_perm_c; 3870 3871 if (!rsorted) { 3872 PetscInt *idxs_r,i; 3873 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3874 for (i=0;i<rsize;i++) { 3875 idxs_r[idxs_perm_r[i]] = i; 3876 } 3877 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3878 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3879 } else { 3880 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3881 } 3882 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3883 3884 if (!csorted) { 3885 if (isrow_s == iscol_s) { 3886 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3887 is_perm_c = is_perm_r; 3888 } else { 3889 PetscInt *idxs_c,i; 3890 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 3891 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3892 for (i=0;i<csize;i++) { 3893 idxs_c[idxs_perm_c[i]] = i; 3894 } 3895 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3896 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3897 } 3898 } else { 3899 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3900 } 3901 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3902 3903 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3904 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3905 work_mat[0] = new_mat; 3906 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3907 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3908 } 3909 3910 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 3911 *B = work_mat[0]; 3912 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 3913 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 3914 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 3915 PetscFunctionReturn(0); 3916 } 3917 3918 #undef __FUNCT__ 3919 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 3920 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 3921 { 3922 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3923 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3924 Mat new_mat; 3925 IS is_local,is_global; 3926 PetscInt local_size; 3927 PetscBool isseqaij; 3928 PetscErrorCode ierr; 3929 3930 PetscFunctionBegin; 3931 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3932 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 3933 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 3934 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 3935 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 3936 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 3937 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 3938 3939 /* check */ 3940 if (pcbddc->dbg_flag) { 3941 Vec x,x_change; 3942 PetscReal error; 3943 3944 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 3945 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3946 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 3947 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3948 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3949 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 3950 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3951 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3952 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3953 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3954 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3955 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 3956 ierr = VecDestroy(&x);CHKERRQ(ierr); 3957 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3958 } 3959 3960 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 3961 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3962 if (isseqaij) { 3963 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3964 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3965 } else { 3966 Mat work_mat; 3967 3968 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3969 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3970 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3971 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 3972 } 3973 if (matis->A->symmetric_set) { 3974 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 3975 #if !defined(PETSC_USE_COMPLEX) 3976 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 3977 #endif 3978 } 3979 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 3980 PetscFunctionReturn(0); 3981 } 3982 3983 #undef __FUNCT__ 3984 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 3985 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 3986 { 3987 PC_IS* pcis = (PC_IS*)(pc->data); 3988 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3989 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3990 PetscInt *idx_R_local=NULL; 3991 PetscInt n_vertices,i,j,n_R,n_D,n_B; 3992 PetscInt vbs,bs; 3993 PetscBT bitmask=NULL; 3994 PetscErrorCode ierr; 3995 3996 PetscFunctionBegin; 3997 /* 3998 No need to setup local scatters if 3999 - primal space is unchanged 4000 AND 4001 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4002 AND 4003 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4004 */ 4005 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4006 PetscFunctionReturn(0); 4007 } 4008 /* destroy old objects */ 4009 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4010 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4011 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4012 /* Set Non-overlapping dimensions */ 4013 n_B = pcis->n_B; 4014 n_D = pcis->n - n_B; 4015 n_vertices = pcbddc->n_vertices; 4016 4017 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4018 4019 /* create auxiliary bitmask and allocate workspace */ 4020 if (!sub_schurs || !sub_schurs->reuse_solver) { 4021 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4022 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4023 for (i=0;i<n_vertices;i++) { 4024 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4025 } 4026 4027 for (i=0, n_R=0; i<pcis->n; i++) { 4028 if (!PetscBTLookup(bitmask,i)) { 4029 idx_R_local[n_R++] = i; 4030 } 4031 } 4032 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4033 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4034 4035 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4036 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4037 } 4038 4039 /* Block code */ 4040 vbs = 1; 4041 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4042 if (bs>1 && !(n_vertices%bs)) { 4043 PetscBool is_blocked = PETSC_TRUE; 4044 PetscInt *vary; 4045 if (!sub_schurs || !sub_schurs->reuse_solver) { 4046 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4047 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4048 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4049 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4050 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4051 for (i=0; i<pcis->n/bs; i++) { 4052 if (vary[i]!=0 && vary[i]!=bs) { 4053 is_blocked = PETSC_FALSE; 4054 break; 4055 } 4056 } 4057 ierr = PetscFree(vary);CHKERRQ(ierr); 4058 } else { 4059 /* Verify directly the R set */ 4060 for (i=0; i<n_R/bs; i++) { 4061 PetscInt j,node=idx_R_local[bs*i]; 4062 for (j=1; j<bs; j++) { 4063 if (node != idx_R_local[bs*i+j]-j) { 4064 is_blocked = PETSC_FALSE; 4065 break; 4066 } 4067 } 4068 } 4069 } 4070 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4071 vbs = bs; 4072 for (i=0;i<n_R/vbs;i++) { 4073 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4074 } 4075 } 4076 } 4077 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4078 if (sub_schurs && sub_schurs->reuse_solver) { 4079 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4080 4081 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4082 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4083 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4084 reuse_solver->is_R = pcbddc->is_R_local; 4085 } else { 4086 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4087 } 4088 4089 /* print some info if requested */ 4090 if (pcbddc->dbg_flag) { 4091 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4092 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4093 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4095 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4096 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4097 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4098 } 4099 4100 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4101 if (!sub_schurs || !sub_schurs->reuse_solver) { 4102 IS is_aux1,is_aux2; 4103 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4104 4105 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4106 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4107 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4108 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4109 for (i=0; i<n_D; i++) { 4110 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4111 } 4112 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4113 for (i=0, j=0; i<n_R; i++) { 4114 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4115 aux_array1[j++] = i; 4116 } 4117 } 4118 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4119 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4120 for (i=0, j=0; i<n_B; i++) { 4121 if (!PetscBTLookup(bitmask,is_indices[i])) { 4122 aux_array2[j++] = i; 4123 } 4124 } 4125 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4126 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4127 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4128 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4129 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4130 4131 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4132 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4133 for (i=0, j=0; i<n_R; i++) { 4134 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4135 aux_array1[j++] = i; 4136 } 4137 } 4138 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4139 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4140 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4141 } 4142 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4143 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4144 } else { 4145 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4146 IS tis; 4147 PetscInt schur_size; 4148 4149 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4150 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4151 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4152 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4153 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4154 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4155 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4156 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4157 } 4158 } 4159 PetscFunctionReturn(0); 4160 } 4161 4162 4163 #undef __FUNCT__ 4164 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4165 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4166 { 4167 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4168 PC_IS *pcis = (PC_IS*)pc->data; 4169 PC pc_temp; 4170 Mat A_RR; 4171 MatReuse reuse; 4172 PetscScalar m_one = -1.0; 4173 PetscReal value; 4174 PetscInt n_D,n_R; 4175 PetscBool check_corr[2],issbaij; 4176 PetscErrorCode ierr; 4177 /* prefixes stuff */ 4178 char dir_prefix[256],neu_prefix[256],str_level[16]; 4179 size_t len; 4180 4181 PetscFunctionBegin; 4182 4183 /* compute prefixes */ 4184 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4185 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4186 if (!pcbddc->current_level) { 4187 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4188 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4189 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4190 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4191 } else { 4192 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4193 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4194 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4195 len -= 15; /* remove "pc_bddc_coarse_" */ 4196 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4197 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4198 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4199 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4200 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4201 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4202 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4203 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4204 } 4205 4206 /* DIRICHLET PROBLEM */ 4207 if (dirichlet) { 4208 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4209 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4210 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4211 if (pcbddc->dbg_flag) { 4212 Mat A_IIn; 4213 4214 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4215 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4216 pcis->A_II = A_IIn; 4217 } 4218 } 4219 if (pcbddc->local_mat->symmetric_set) { 4220 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4221 } 4222 /* Matrix for Dirichlet problem is pcis->A_II */ 4223 n_D = pcis->n - pcis->n_B; 4224 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4225 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4226 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4227 /* default */ 4228 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4229 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4230 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4231 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4232 if (issbaij) { 4233 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4234 } else { 4235 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4236 } 4237 /* Allow user's customization */ 4238 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4239 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4240 } 4241 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4242 if (sub_schurs && sub_schurs->reuse_solver) { 4243 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4244 4245 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4246 } 4247 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4248 if (!n_D) { 4249 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4250 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4251 } 4252 /* Set Up KSP for Dirichlet problem of BDDC */ 4253 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4254 /* set ksp_D into pcis data */ 4255 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4256 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4257 pcis->ksp_D = pcbddc->ksp_D; 4258 } 4259 4260 /* NEUMANN PROBLEM */ 4261 A_RR = 0; 4262 if (neumann) { 4263 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4264 PetscInt ibs,mbs; 4265 PetscBool issbaij; 4266 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4267 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4268 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4269 if (pcbddc->ksp_R) { /* already created ksp */ 4270 PetscInt nn_R; 4271 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4272 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4273 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4274 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4275 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4276 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4277 reuse = MAT_INITIAL_MATRIX; 4278 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4279 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4280 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4281 reuse = MAT_INITIAL_MATRIX; 4282 } else { /* safe to reuse the matrix */ 4283 reuse = MAT_REUSE_MATRIX; 4284 } 4285 } 4286 /* last check */ 4287 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4288 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4289 reuse = MAT_INITIAL_MATRIX; 4290 } 4291 } else { /* first time, so we need to create the matrix */ 4292 reuse = MAT_INITIAL_MATRIX; 4293 } 4294 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4295 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4296 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4297 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4298 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4299 if (matis->A == pcbddc->local_mat) { 4300 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4301 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4302 } else { 4303 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4304 } 4305 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4306 if (matis->A == pcbddc->local_mat) { 4307 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4308 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4309 } else { 4310 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4311 } 4312 } 4313 /* extract A_RR */ 4314 if (sub_schurs && sub_schurs->reuse_solver) { 4315 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4316 4317 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4318 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4319 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4320 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4321 } else { 4322 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4323 } 4324 } else { 4325 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4326 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4327 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4328 } 4329 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4330 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4331 } 4332 if (pcbddc->local_mat->symmetric_set) { 4333 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4334 } 4335 if (!pcbddc->ksp_R) { /* create object if not present */ 4336 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4337 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4338 /* default */ 4339 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4340 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4341 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4342 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4343 if (issbaij) { 4344 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4345 } else { 4346 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4347 } 4348 /* Allow user's customization */ 4349 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4350 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4351 } 4352 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4353 if (!n_R) { 4354 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4355 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4356 } 4357 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4358 /* Reuse solver if it is present */ 4359 if (sub_schurs && sub_schurs->reuse_solver) { 4360 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4361 4362 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4363 } 4364 /* Set Up KSP for Neumann problem of BDDC */ 4365 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4366 } 4367 4368 if (pcbddc->dbg_flag) { 4369 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4370 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4371 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4372 } 4373 4374 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4375 check_corr[0] = check_corr[1] = PETSC_FALSE; 4376 if (pcbddc->NullSpace_corr[0]) { 4377 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4378 } 4379 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4380 check_corr[0] = PETSC_TRUE; 4381 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4382 } 4383 if (neumann && pcbddc->NullSpace_corr[2]) { 4384 check_corr[1] = PETSC_TRUE; 4385 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4386 } 4387 4388 /* check Dirichlet and Neumann solvers */ 4389 if (pcbddc->dbg_flag) { 4390 if (dirichlet) { /* Dirichlet */ 4391 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4392 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4393 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4394 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4395 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4396 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4397 if (check_corr[0]) { 4398 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4399 } 4400 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4401 } 4402 if (neumann) { /* Neumann */ 4403 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4404 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4405 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4406 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4407 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4408 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4409 if (check_corr[1]) { 4410 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4411 } 4412 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4413 } 4414 } 4415 /* free Neumann problem's matrix */ 4416 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4417 PetscFunctionReturn(0); 4418 } 4419 4420 #undef __FUNCT__ 4421 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4422 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4423 { 4424 PetscErrorCode ierr; 4425 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4426 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4427 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4428 4429 PetscFunctionBegin; 4430 if (!reuse_solver) { 4431 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4432 } 4433 if (!pcbddc->switch_static) { 4434 if (applytranspose && pcbddc->local_auxmat1) { 4435 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4436 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4437 } 4438 if (!reuse_solver) { 4439 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4440 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4441 } else { 4442 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4443 4444 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4445 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4446 } 4447 } else { 4448 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4449 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4450 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4451 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4452 if (applytranspose && pcbddc->local_auxmat1) { 4453 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4454 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4455 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4456 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4457 } 4458 } 4459 if (!reuse_solver || pcbddc->switch_static) { 4460 if (applytranspose) { 4461 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4462 } else { 4463 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4464 } 4465 } else { 4466 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4467 4468 if (applytranspose) { 4469 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4470 } else { 4471 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4472 } 4473 } 4474 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4475 if (!pcbddc->switch_static) { 4476 if (!reuse_solver) { 4477 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4478 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4479 } else { 4480 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4481 4482 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4483 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4484 } 4485 if (!applytranspose && pcbddc->local_auxmat1) { 4486 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4487 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4488 } 4489 } else { 4490 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4491 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4492 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4493 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4494 if (!applytranspose && pcbddc->local_auxmat1) { 4495 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4496 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4497 } 4498 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4499 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4500 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4501 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4502 } 4503 PetscFunctionReturn(0); 4504 } 4505 4506 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4507 #undef __FUNCT__ 4508 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4509 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4510 { 4511 PetscErrorCode ierr; 4512 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4513 PC_IS* pcis = (PC_IS*) (pc->data); 4514 const PetscScalar zero = 0.0; 4515 4516 PetscFunctionBegin; 4517 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4518 if (!pcbddc->benign_apply_coarse_only) { 4519 if (applytranspose) { 4520 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4521 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4522 } else { 4523 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4524 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4525 } 4526 } else { 4527 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4528 } 4529 4530 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4531 if (pcbddc->benign_n) { 4532 PetscScalar *array; 4533 PetscInt j; 4534 4535 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4536 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4537 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4538 } 4539 4540 /* start communications from local primal nodes to rhs of coarse solver */ 4541 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4542 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4543 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4544 4545 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4546 if (pcbddc->coarse_ksp) { 4547 Mat coarse_mat; 4548 Vec rhs,sol; 4549 MatNullSpace nullsp; 4550 PetscBool isbddc = PETSC_FALSE; 4551 4552 if (pcbddc->benign_have_null) { 4553 PC coarse_pc; 4554 4555 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4556 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4557 /* we need to propagate to coarser levels the need for a possible benign correction */ 4558 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4559 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4560 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4561 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4562 } 4563 } 4564 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4565 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4566 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4567 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4568 if (nullsp) { 4569 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4570 } 4571 if (applytranspose) { 4572 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4573 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4574 } else { 4575 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4576 PC coarse_pc; 4577 4578 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4579 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4580 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4581 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4582 } else { 4583 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4584 } 4585 } 4586 /* we don't need the benign correction at coarser levels anymore */ 4587 if (pcbddc->benign_have_null && isbddc) { 4588 PC coarse_pc; 4589 PC_BDDC* coarsepcbddc; 4590 4591 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4592 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4593 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4594 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4595 } 4596 if (nullsp) { 4597 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4598 } 4599 } 4600 4601 /* Local solution on R nodes */ 4602 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4603 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4604 } 4605 /* communications from coarse sol to local primal nodes */ 4606 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4607 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4608 4609 /* Sum contributions from the two levels */ 4610 if (!pcbddc->benign_apply_coarse_only) { 4611 if (applytranspose) { 4612 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4613 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4614 } else { 4615 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4616 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4617 } 4618 /* store p0 */ 4619 if (pcbddc->benign_n) { 4620 PetscScalar *array; 4621 PetscInt j; 4622 4623 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4624 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4625 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4626 } 4627 } else { /* expand the coarse solution */ 4628 if (applytranspose) { 4629 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4630 } else { 4631 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4632 } 4633 } 4634 PetscFunctionReturn(0); 4635 } 4636 4637 #undef __FUNCT__ 4638 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4639 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4640 { 4641 PetscErrorCode ierr; 4642 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4643 PetscScalar *array; 4644 Vec from,to; 4645 4646 PetscFunctionBegin; 4647 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4648 from = pcbddc->coarse_vec; 4649 to = pcbddc->vec1_P; 4650 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4651 Vec tvec; 4652 4653 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4654 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4655 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4656 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4657 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4658 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4659 } 4660 } else { /* from local to global -> put data in coarse right hand side */ 4661 from = pcbddc->vec1_P; 4662 to = pcbddc->coarse_vec; 4663 } 4664 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4665 PetscFunctionReturn(0); 4666 } 4667 4668 #undef __FUNCT__ 4669 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4670 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4671 { 4672 PetscErrorCode ierr; 4673 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4674 PetscScalar *array; 4675 Vec from,to; 4676 4677 PetscFunctionBegin; 4678 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4679 from = pcbddc->coarse_vec; 4680 to = pcbddc->vec1_P; 4681 } else { /* from local to global -> put data in coarse right hand side */ 4682 from = pcbddc->vec1_P; 4683 to = pcbddc->coarse_vec; 4684 } 4685 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4686 if (smode == SCATTER_FORWARD) { 4687 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4688 Vec tvec; 4689 4690 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4691 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4692 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4693 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4694 } 4695 } else { 4696 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4697 ierr = VecResetArray(from);CHKERRQ(ierr); 4698 } 4699 } 4700 PetscFunctionReturn(0); 4701 } 4702 4703 /* uncomment for testing purposes */ 4704 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4705 #undef __FUNCT__ 4706 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4707 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4708 { 4709 PetscErrorCode ierr; 4710 PC_IS* pcis = (PC_IS*)(pc->data); 4711 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4712 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4713 /* one and zero */ 4714 PetscScalar one=1.0,zero=0.0; 4715 /* space to store constraints and their local indices */ 4716 PetscScalar *constraints_data; 4717 PetscInt *constraints_idxs,*constraints_idxs_B; 4718 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4719 PetscInt *constraints_n; 4720 /* iterators */ 4721 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4722 /* BLAS integers */ 4723 PetscBLASInt lwork,lierr; 4724 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4725 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4726 /* reuse */ 4727 PetscInt olocal_primal_size,olocal_primal_size_cc; 4728 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4729 /* change of basis */ 4730 PetscBool qr_needed; 4731 PetscBT change_basis,qr_needed_idx; 4732 /* auxiliary stuff */ 4733 PetscInt *nnz,*is_indices; 4734 PetscInt ncc; 4735 /* some quantities */ 4736 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4737 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4738 4739 PetscFunctionBegin; 4740 /* Destroy Mat objects computed previously */ 4741 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4742 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4743 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4744 /* save info on constraints from previous setup (if any) */ 4745 olocal_primal_size = pcbddc->local_primal_size; 4746 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4747 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4748 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4749 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4750 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4751 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4752 4753 if (!pcbddc->adaptive_selection) { 4754 IS ISForVertices,*ISForFaces,*ISForEdges; 4755 MatNullSpace nearnullsp; 4756 const Vec *nearnullvecs; 4757 Vec *localnearnullsp; 4758 PetscScalar *array; 4759 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4760 PetscBool nnsp_has_cnst; 4761 /* LAPACK working arrays for SVD or POD */ 4762 PetscBool skip_lapack,boolforchange; 4763 PetscScalar *work; 4764 PetscReal *singular_vals; 4765 #if defined(PETSC_USE_COMPLEX) 4766 PetscReal *rwork; 4767 #endif 4768 #if defined(PETSC_MISSING_LAPACK_GESVD) 4769 PetscScalar *temp_basis,*correlation_mat; 4770 #else 4771 PetscBLASInt dummy_int=1; 4772 PetscScalar dummy_scalar=1.; 4773 #endif 4774 4775 /* Get index sets for faces, edges and vertices from graph */ 4776 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4777 /* print some info */ 4778 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 4779 PetscInt nv; 4780 4781 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4782 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4783 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4784 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4785 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4786 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4787 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4788 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4789 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4790 } 4791 4792 /* free unneeded index sets */ 4793 if (!pcbddc->use_vertices) { 4794 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4795 } 4796 if (!pcbddc->use_edges) { 4797 for (i=0;i<n_ISForEdges;i++) { 4798 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4799 } 4800 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4801 n_ISForEdges = 0; 4802 } 4803 if (!pcbddc->use_faces) { 4804 for (i=0;i<n_ISForFaces;i++) { 4805 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4806 } 4807 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4808 n_ISForFaces = 0; 4809 } 4810 4811 /* check if near null space is attached to global mat */ 4812 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4813 if (nearnullsp) { 4814 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4815 /* remove any stored info */ 4816 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4817 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4818 /* store information for BDDC solver reuse */ 4819 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4820 pcbddc->onearnullspace = nearnullsp; 4821 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4822 for (i=0;i<nnsp_size;i++) { 4823 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4824 } 4825 } else { /* if near null space is not provided BDDC uses constants by default */ 4826 nnsp_size = 0; 4827 nnsp_has_cnst = PETSC_TRUE; 4828 } 4829 /* get max number of constraints on a single cc */ 4830 max_constraints = nnsp_size; 4831 if (nnsp_has_cnst) max_constraints++; 4832 4833 /* 4834 Evaluate maximum storage size needed by the procedure 4835 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4836 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4837 There can be multiple constraints per connected component 4838 */ 4839 n_vertices = 0; 4840 if (ISForVertices) { 4841 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4842 } 4843 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4844 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4845 4846 total_counts = n_ISForFaces+n_ISForEdges; 4847 total_counts *= max_constraints; 4848 total_counts += n_vertices; 4849 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4850 4851 total_counts = 0; 4852 max_size_of_constraint = 0; 4853 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4854 IS used_is; 4855 if (i<n_ISForEdges) { 4856 used_is = ISForEdges[i]; 4857 } else { 4858 used_is = ISForFaces[i-n_ISForEdges]; 4859 } 4860 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4861 total_counts += j; 4862 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4863 } 4864 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4865 4866 /* get local part of global near null space vectors */ 4867 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4868 for (k=0;k<nnsp_size;k++) { 4869 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4870 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4871 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4872 } 4873 4874 /* whether or not to skip lapack calls */ 4875 skip_lapack = PETSC_TRUE; 4876 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4877 4878 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4879 if (!skip_lapack) { 4880 PetscScalar temp_work; 4881 4882 #if defined(PETSC_MISSING_LAPACK_GESVD) 4883 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4884 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4885 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4886 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4887 #if defined(PETSC_USE_COMPLEX) 4888 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4889 #endif 4890 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4891 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4892 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4893 lwork = -1; 4894 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4895 #if !defined(PETSC_USE_COMPLEX) 4896 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4897 #else 4898 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4899 #endif 4900 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4901 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4902 #else /* on missing GESVD */ 4903 /* SVD */ 4904 PetscInt max_n,min_n; 4905 max_n = max_size_of_constraint; 4906 min_n = max_constraints; 4907 if (max_size_of_constraint < max_constraints) { 4908 min_n = max_size_of_constraint; 4909 max_n = max_constraints; 4910 } 4911 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 4912 #if defined(PETSC_USE_COMPLEX) 4913 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 4914 #endif 4915 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4916 lwork = -1; 4917 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 4918 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 4919 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 4920 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4921 #if !defined(PETSC_USE_COMPLEX) 4922 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 4923 #else 4924 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 4925 #endif 4926 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4927 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 4928 #endif /* on missing GESVD */ 4929 /* Allocate optimal workspace */ 4930 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 4931 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 4932 } 4933 /* Now we can loop on constraining sets */ 4934 total_counts = 0; 4935 constraints_idxs_ptr[0] = 0; 4936 constraints_data_ptr[0] = 0; 4937 /* vertices */ 4938 if (n_vertices) { 4939 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4940 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4941 for (i=0;i<n_vertices;i++) { 4942 constraints_n[total_counts] = 1; 4943 constraints_data[total_counts] = 1.0; 4944 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 4945 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 4946 total_counts++; 4947 } 4948 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4949 n_vertices = total_counts; 4950 } 4951 4952 /* edges and faces */ 4953 total_counts_cc = total_counts; 4954 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 4955 IS used_is; 4956 PetscBool idxs_copied = PETSC_FALSE; 4957 4958 if (ncc<n_ISForEdges) { 4959 used_is = ISForEdges[ncc]; 4960 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 4961 } else { 4962 used_is = ISForFaces[ncc-n_ISForEdges]; 4963 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 4964 } 4965 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 4966 4967 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 4968 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4969 /* change of basis should not be performed on local periodic nodes */ 4970 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 4971 if (nnsp_has_cnst) { 4972 PetscScalar quad_value; 4973 4974 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4975 idxs_copied = PETSC_TRUE; 4976 4977 if (!pcbddc->use_nnsp_true) { 4978 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 4979 } else { 4980 quad_value = 1.0; 4981 } 4982 for (j=0;j<size_of_constraint;j++) { 4983 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 4984 } 4985 temp_constraints++; 4986 total_counts++; 4987 } 4988 for (k=0;k<nnsp_size;k++) { 4989 PetscReal real_value; 4990 PetscScalar *ptr_to_data; 4991 4992 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4993 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 4994 for (j=0;j<size_of_constraint;j++) { 4995 ptr_to_data[j] = array[is_indices[j]]; 4996 } 4997 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4998 /* check if array is null on the connected component */ 4999 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5000 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5001 if (real_value > 0.0) { /* keep indices and values */ 5002 temp_constraints++; 5003 total_counts++; 5004 if (!idxs_copied) { 5005 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5006 idxs_copied = PETSC_TRUE; 5007 } 5008 } 5009 } 5010 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5011 valid_constraints = temp_constraints; 5012 if (!pcbddc->use_nnsp_true && temp_constraints) { 5013 if (temp_constraints == 1) { /* just normalize the constraint */ 5014 PetscScalar norm,*ptr_to_data; 5015 5016 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5017 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5018 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5019 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5020 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5021 } else { /* perform SVD */ 5022 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5023 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5024 5025 #if defined(PETSC_MISSING_LAPACK_GESVD) 5026 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5027 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5028 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5029 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5030 from that computed using LAPACKgesvd 5031 -> This is due to a different computation of eigenvectors in LAPACKheev 5032 -> The quality of the POD-computed basis will be the same */ 5033 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5034 /* Store upper triangular part of correlation matrix */ 5035 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5036 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5037 for (j=0;j<temp_constraints;j++) { 5038 for (k=0;k<j+1;k++) { 5039 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5040 } 5041 } 5042 /* compute eigenvalues and eigenvectors of correlation matrix */ 5043 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5044 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5045 #if !defined(PETSC_USE_COMPLEX) 5046 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5047 #else 5048 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5049 #endif 5050 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5051 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5052 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5053 j = 0; 5054 while (j < temp_constraints && singular_vals[j] < tol) j++; 5055 total_counts = total_counts-j; 5056 valid_constraints = temp_constraints-j; 5057 /* scale and copy POD basis into used quadrature memory */ 5058 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5059 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5060 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5061 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5062 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5063 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5064 if (j<temp_constraints) { 5065 PetscInt ii; 5066 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5067 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5068 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5069 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5070 for (k=0;k<temp_constraints-j;k++) { 5071 for (ii=0;ii<size_of_constraint;ii++) { 5072 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5073 } 5074 } 5075 } 5076 #else /* on missing GESVD */ 5077 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5078 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5079 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5080 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5081 #if !defined(PETSC_USE_COMPLEX) 5082 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5083 #else 5084 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5085 #endif 5086 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5087 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5088 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5089 k = temp_constraints; 5090 if (k > size_of_constraint) k = size_of_constraint; 5091 j = 0; 5092 while (j < k && singular_vals[k-j-1] < tol) j++; 5093 valid_constraints = k-j; 5094 total_counts = total_counts-temp_constraints+valid_constraints; 5095 #endif /* on missing GESVD */ 5096 } 5097 } 5098 /* update pointers information */ 5099 if (valid_constraints) { 5100 constraints_n[total_counts_cc] = valid_constraints; 5101 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5102 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5103 /* set change_of_basis flag */ 5104 if (boolforchange) { 5105 PetscBTSet(change_basis,total_counts_cc); 5106 } 5107 total_counts_cc++; 5108 } 5109 } 5110 /* free workspace */ 5111 if (!skip_lapack) { 5112 ierr = PetscFree(work);CHKERRQ(ierr); 5113 #if defined(PETSC_USE_COMPLEX) 5114 ierr = PetscFree(rwork);CHKERRQ(ierr); 5115 #endif 5116 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5117 #if defined(PETSC_MISSING_LAPACK_GESVD) 5118 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5119 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5120 #endif 5121 } 5122 for (k=0;k<nnsp_size;k++) { 5123 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5124 } 5125 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5126 /* free index sets of faces, edges and vertices */ 5127 for (i=0;i<n_ISForFaces;i++) { 5128 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5129 } 5130 if (n_ISForFaces) { 5131 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5132 } 5133 for (i=0;i<n_ISForEdges;i++) { 5134 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5135 } 5136 if (n_ISForEdges) { 5137 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5138 } 5139 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5140 } else { 5141 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5142 5143 total_counts = 0; 5144 n_vertices = 0; 5145 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5146 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5147 } 5148 max_constraints = 0; 5149 total_counts_cc = 0; 5150 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5151 total_counts += pcbddc->adaptive_constraints_n[i]; 5152 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5153 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5154 } 5155 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5156 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5157 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5158 constraints_data = pcbddc->adaptive_constraints_data; 5159 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5160 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5161 total_counts_cc = 0; 5162 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5163 if (pcbddc->adaptive_constraints_n[i]) { 5164 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5165 } 5166 } 5167 #if 0 5168 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5169 for (i=0;i<total_counts_cc;i++) { 5170 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5171 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5172 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5173 printf(" %d",constraints_idxs[j]); 5174 } 5175 printf("\n"); 5176 printf("number of cc: %d\n",constraints_n[i]); 5177 } 5178 for (i=0;i<n_vertices;i++) { 5179 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5180 } 5181 for (i=0;i<sub_schurs->n_subs;i++) { 5182 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5183 } 5184 #endif 5185 5186 max_size_of_constraint = 0; 5187 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5188 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5189 /* Change of basis */ 5190 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5191 if (pcbddc->use_change_of_basis) { 5192 for (i=0;i<sub_schurs->n_subs;i++) { 5193 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5194 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5195 } 5196 } 5197 } 5198 } 5199 pcbddc->local_primal_size = total_counts; 5200 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5201 5202 /* map constraints_idxs in boundary numbering */ 5203 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5204 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5205 5206 /* Create constraint matrix */ 5207 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5208 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5209 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5210 5211 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5212 /* determine if a QR strategy is needed for change of basis */ 5213 qr_needed = PETSC_FALSE; 5214 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5215 total_primal_vertices=0; 5216 pcbddc->local_primal_size_cc = 0; 5217 for (i=0;i<total_counts_cc;i++) { 5218 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5219 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5220 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5221 pcbddc->local_primal_size_cc += 1; 5222 } else if (PetscBTLookup(change_basis,i)) { 5223 for (k=0;k<constraints_n[i];k++) { 5224 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5225 } 5226 pcbddc->local_primal_size_cc += constraints_n[i]; 5227 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5228 PetscBTSet(qr_needed_idx,i); 5229 qr_needed = PETSC_TRUE; 5230 } 5231 } else { 5232 pcbddc->local_primal_size_cc += 1; 5233 } 5234 } 5235 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5236 pcbddc->n_vertices = total_primal_vertices; 5237 /* permute indices in order to have a sorted set of vertices */ 5238 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5239 5240 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5241 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5242 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5243 5244 /* nonzero structure of constraint matrix */ 5245 /* and get reference dof for local constraints */ 5246 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5247 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5248 5249 j = total_primal_vertices; 5250 total_counts = total_primal_vertices; 5251 cum = total_primal_vertices; 5252 for (i=n_vertices;i<total_counts_cc;i++) { 5253 if (!PetscBTLookup(change_basis,i)) { 5254 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5255 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5256 cum++; 5257 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5258 for (k=0;k<constraints_n[i];k++) { 5259 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5260 nnz[j+k] = size_of_constraint; 5261 } 5262 j += constraints_n[i]; 5263 } 5264 } 5265 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5266 ierr = PetscFree(nnz);CHKERRQ(ierr); 5267 5268 /* set values in constraint matrix */ 5269 for (i=0;i<total_primal_vertices;i++) { 5270 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5271 } 5272 total_counts = total_primal_vertices; 5273 for (i=n_vertices;i<total_counts_cc;i++) { 5274 if (!PetscBTLookup(change_basis,i)) { 5275 PetscInt *cols; 5276 5277 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5278 cols = constraints_idxs+constraints_idxs_ptr[i]; 5279 for (k=0;k<constraints_n[i];k++) { 5280 PetscInt row = total_counts+k; 5281 PetscScalar *vals; 5282 5283 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5284 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5285 } 5286 total_counts += constraints_n[i]; 5287 } 5288 } 5289 /* assembling */ 5290 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5291 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5292 5293 /* 5294 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5295 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5296 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5297 */ 5298 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5299 if (pcbddc->use_change_of_basis) { 5300 /* dual and primal dofs on a single cc */ 5301 PetscInt dual_dofs,primal_dofs; 5302 /* working stuff for GEQRF */ 5303 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5304 PetscBLASInt lqr_work; 5305 /* working stuff for UNGQR */ 5306 PetscScalar *gqr_work,lgqr_work_t; 5307 PetscBLASInt lgqr_work; 5308 /* working stuff for TRTRS */ 5309 PetscScalar *trs_rhs; 5310 PetscBLASInt Blas_NRHS; 5311 /* pointers for values insertion into change of basis matrix */ 5312 PetscInt *start_rows,*start_cols; 5313 PetscScalar *start_vals; 5314 /* working stuff for values insertion */ 5315 PetscBT is_primal; 5316 PetscInt *aux_primal_numbering_B; 5317 /* matrix sizes */ 5318 PetscInt global_size,local_size; 5319 /* temporary change of basis */ 5320 Mat localChangeOfBasisMatrix; 5321 /* extra space for debugging */ 5322 PetscScalar *dbg_work; 5323 5324 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5325 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5326 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5327 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5328 /* nonzeros for local mat */ 5329 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5330 if (!pcbddc->benign_change || pcbddc->fake_change) { 5331 for (i=0;i<pcis->n;i++) nnz[i]=1; 5332 } else { 5333 const PetscInt *ii; 5334 PetscInt n; 5335 PetscBool flg_row; 5336 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5337 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5338 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5339 } 5340 for (i=n_vertices;i<total_counts_cc;i++) { 5341 if (PetscBTLookup(change_basis,i)) { 5342 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5343 if (PetscBTLookup(qr_needed_idx,i)) { 5344 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5345 } else { 5346 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5347 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5348 } 5349 } 5350 } 5351 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5352 ierr = PetscFree(nnz);CHKERRQ(ierr); 5353 /* Set interior change in the matrix */ 5354 if (!pcbddc->benign_change || pcbddc->fake_change) { 5355 for (i=0;i<pcis->n;i++) { 5356 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5357 } 5358 } else { 5359 const PetscInt *ii,*jj; 5360 PetscScalar *aa; 5361 PetscInt n; 5362 PetscBool flg_row; 5363 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5364 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5365 for (i=0;i<n;i++) { 5366 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5367 } 5368 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5369 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5370 } 5371 5372 if (pcbddc->dbg_flag) { 5373 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5374 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5375 } 5376 5377 5378 /* Now we loop on the constraints which need a change of basis */ 5379 /* 5380 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5381 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5382 5383 Basic blocks of change of basis matrix T computed by 5384 5385 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5386 5387 | 1 0 ... 0 s_1/S | 5388 | 0 1 ... 0 s_2/S | 5389 | ... | 5390 | 0 ... 1 s_{n-1}/S | 5391 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5392 5393 with S = \sum_{i=1}^n s_i^2 5394 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5395 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5396 5397 - QR decomposition of constraints otherwise 5398 */ 5399 if (qr_needed) { 5400 /* space to store Q */ 5401 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5402 /* array to store scaling factors for reflectors */ 5403 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5404 /* first we issue queries for optimal work */ 5405 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5406 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5407 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5408 lqr_work = -1; 5409 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5410 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5411 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5412 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5413 lgqr_work = -1; 5414 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5415 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5416 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5417 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5418 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5419 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5420 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5421 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5422 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5423 /* array to store rhs and solution of triangular solver */ 5424 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5425 /* allocating workspace for check */ 5426 if (pcbddc->dbg_flag) { 5427 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5428 } 5429 } 5430 /* array to store whether a node is primal or not */ 5431 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5432 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5433 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5434 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5435 for (i=0;i<total_primal_vertices;i++) { 5436 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5437 } 5438 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5439 5440 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5441 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5442 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5443 if (PetscBTLookup(change_basis,total_counts)) { 5444 /* get constraint info */ 5445 primal_dofs = constraints_n[total_counts]; 5446 dual_dofs = size_of_constraint-primal_dofs; 5447 5448 if (pcbddc->dbg_flag) { 5449 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5450 } 5451 5452 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5453 5454 /* copy quadrature constraints for change of basis check */ 5455 if (pcbddc->dbg_flag) { 5456 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5457 } 5458 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5459 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5460 5461 /* compute QR decomposition of constraints */ 5462 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5463 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5464 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5465 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5466 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5467 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5468 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5469 5470 /* explictly compute R^-T */ 5471 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5472 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5473 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5474 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5475 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5476 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5477 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5478 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5479 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5480 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5481 5482 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5483 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5484 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5485 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5486 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5487 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5488 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5489 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5490 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5491 5492 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5493 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5494 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5495 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5496 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5497 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5499 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5501 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5502 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5503 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5504 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5505 5506 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5507 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5508 /* insert cols for primal dofs */ 5509 for (j=0;j<primal_dofs;j++) { 5510 start_vals = &qr_basis[j*size_of_constraint]; 5511 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5512 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5513 } 5514 /* insert cols for dual dofs */ 5515 for (j=0,k=0;j<dual_dofs;k++) { 5516 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5517 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5518 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5519 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5520 j++; 5521 } 5522 } 5523 5524 /* check change of basis */ 5525 if (pcbddc->dbg_flag) { 5526 PetscInt ii,jj; 5527 PetscBool valid_qr=PETSC_TRUE; 5528 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5529 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5530 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5531 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5533 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5534 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5535 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5536 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5537 for (jj=0;jj<size_of_constraint;jj++) { 5538 for (ii=0;ii<primal_dofs;ii++) { 5539 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5540 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5541 } 5542 } 5543 if (!valid_qr) { 5544 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5545 for (jj=0;jj<size_of_constraint;jj++) { 5546 for (ii=0;ii<primal_dofs;ii++) { 5547 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5548 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5549 } 5550 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5551 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5552 } 5553 } 5554 } 5555 } else { 5556 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5557 } 5558 } 5559 } else { /* simple transformation block */ 5560 PetscInt row,col; 5561 PetscScalar val,norm; 5562 5563 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5564 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5565 for (j=0;j<size_of_constraint;j++) { 5566 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5567 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5568 if (!PetscBTLookup(is_primal,row_B)) { 5569 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5570 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5571 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5572 } else { 5573 for (k=0;k<size_of_constraint;k++) { 5574 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5575 if (row != col) { 5576 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5577 } else { 5578 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5579 } 5580 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5581 } 5582 } 5583 } 5584 if (pcbddc->dbg_flag) { 5585 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5586 } 5587 } 5588 } else { 5589 if (pcbddc->dbg_flag) { 5590 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5591 } 5592 } 5593 } 5594 5595 /* free workspace */ 5596 if (qr_needed) { 5597 if (pcbddc->dbg_flag) { 5598 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5599 } 5600 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5601 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5602 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5603 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5604 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5605 } 5606 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5607 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5608 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5609 5610 /* assembling of global change of variable */ 5611 if (!pcbddc->fake_change) { 5612 Mat tmat; 5613 PetscInt bs; 5614 5615 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5616 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5617 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5618 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5619 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5620 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5621 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5622 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5623 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5624 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5625 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5626 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5627 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5628 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5629 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5630 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5631 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5632 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5633 5634 /* check */ 5635 if (pcbddc->dbg_flag) { 5636 PetscReal error; 5637 Vec x,x_change; 5638 5639 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5640 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5641 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5642 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5643 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5644 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5645 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5646 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5647 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5648 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5649 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5650 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5651 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5652 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5653 ierr = VecDestroy(&x);CHKERRQ(ierr); 5654 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5655 } 5656 /* adapt sub_schurs computed (if any) */ 5657 if (pcbddc->use_deluxe_scaling) { 5658 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5659 5660 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5661 if (sub_schurs && sub_schurs->S_Ej_all) { 5662 Mat S_new,tmat; 5663 IS is_all_N,is_V_Sall = NULL; 5664 5665 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5666 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5667 if (pcbddc->deluxe_zerorows) { 5668 ISLocalToGlobalMapping NtoSall; 5669 IS is_V; 5670 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5671 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5672 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5673 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5674 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5675 } 5676 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5677 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5678 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5679 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5680 if (pcbddc->deluxe_zerorows) { 5681 const PetscScalar *array; 5682 const PetscInt *idxs_V,*idxs_all; 5683 PetscInt i,n_V; 5684 5685 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5686 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5687 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5688 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5689 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5690 for (i=0;i<n_V;i++) { 5691 PetscScalar val; 5692 PetscInt idx; 5693 5694 idx = idxs_V[i]; 5695 val = array[idxs_all[idxs_V[i]]]; 5696 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5697 } 5698 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5700 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5701 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5702 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5703 } 5704 sub_schurs->S_Ej_all = S_new; 5705 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5706 if (sub_schurs->sum_S_Ej_all) { 5707 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5708 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5709 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5710 if (pcbddc->deluxe_zerorows) { 5711 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5712 } 5713 sub_schurs->sum_S_Ej_all = S_new; 5714 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5715 } 5716 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5717 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5718 } 5719 /* destroy any change of basis context in sub_schurs */ 5720 if (sub_schurs && sub_schurs->change) { 5721 PetscInt i; 5722 5723 for (i=0;i<sub_schurs->n_subs;i++) { 5724 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5725 } 5726 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5727 } 5728 } 5729 if (pcbddc->switch_static) { /* need to save the local change */ 5730 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5731 } else { 5732 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5733 } 5734 /* determine if any process has changed the pressures locally */ 5735 pcbddc->change_interior = pcbddc->benign_have_null; 5736 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5737 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5738 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5739 pcbddc->use_qr_single = qr_needed; 5740 } 5741 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5742 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5743 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5744 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5745 } else { 5746 Mat benign_global = NULL; 5747 if (pcbddc->benign_have_null) { 5748 Mat tmat; 5749 5750 pcbddc->change_interior = PETSC_TRUE; 5751 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5752 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5753 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5754 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5755 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5756 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5757 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5758 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5759 if (pcbddc->benign_change) { 5760 Mat M; 5761 5762 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5763 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5764 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5765 ierr = MatDestroy(&M);CHKERRQ(ierr); 5766 } else { 5767 Mat eye; 5768 PetscScalar *array; 5769 5770 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5771 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5772 for (i=0;i<pcis->n;i++) { 5773 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5774 } 5775 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5776 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5777 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5778 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5779 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5780 } 5781 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5782 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5783 } 5784 if (pcbddc->user_ChangeOfBasisMatrix) { 5785 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5786 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5787 } else if (pcbddc->benign_have_null) { 5788 pcbddc->ChangeOfBasisMatrix = benign_global; 5789 } 5790 } 5791 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5792 IS is_global; 5793 const PetscInt *gidxs; 5794 5795 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5796 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5797 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5798 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5799 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5800 } 5801 } 5802 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5803 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5804 } 5805 5806 if (!pcbddc->fake_change) { 5807 /* add pressure dofs to set of primal nodes for numbering purposes */ 5808 for (i=0;i<pcbddc->benign_n;i++) { 5809 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5810 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5811 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5812 pcbddc->local_primal_size_cc++; 5813 pcbddc->local_primal_size++; 5814 } 5815 5816 /* check if a new primal space has been introduced (also take into account benign trick) */ 5817 pcbddc->new_primal_space_local = PETSC_TRUE; 5818 if (olocal_primal_size == pcbddc->local_primal_size) { 5819 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5820 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5821 if (!pcbddc->new_primal_space_local) { 5822 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5823 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5824 } 5825 } 5826 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5827 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5828 } 5829 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5830 5831 /* flush dbg viewer */ 5832 if (pcbddc->dbg_flag) { 5833 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5834 } 5835 5836 /* free workspace */ 5837 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5838 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5839 if (!pcbddc->adaptive_selection) { 5840 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5841 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5842 } else { 5843 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5844 pcbddc->adaptive_constraints_idxs_ptr, 5845 pcbddc->adaptive_constraints_data_ptr, 5846 pcbddc->adaptive_constraints_idxs, 5847 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5848 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5849 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5850 } 5851 PetscFunctionReturn(0); 5852 } 5853 5854 #undef __FUNCT__ 5855 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5856 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5857 { 5858 ISLocalToGlobalMapping map; 5859 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5860 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5861 PetscInt ierr,i,N; 5862 5863 PetscFunctionBegin; 5864 if (pcbddc->recompute_topography) { 5865 /* Reset previously computed graph */ 5866 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5867 /* Init local Graph struct */ 5868 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5869 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5870 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5871 5872 /* Check validity of the csr graph passed in by the user */ 5873 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5874 5875 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5876 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5877 PetscInt *xadj,*adjncy; 5878 PetscInt nvtxs; 5879 PetscBool flg_row=PETSC_FALSE; 5880 5881 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5882 if (flg_row) { 5883 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5884 pcbddc->computed_rowadj = PETSC_TRUE; 5885 } 5886 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5887 } 5888 if (pcbddc->dbg_flag) { 5889 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5890 } 5891 5892 /* Setup of Graph */ 5893 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5894 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5895 5896 /* attach info on disconnected subdomains if present */ 5897 if (pcbddc->n_local_subs) { 5898 PetscInt *local_subs; 5899 5900 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5901 for (i=0;i<pcbddc->n_local_subs;i++) { 5902 const PetscInt *idxs; 5903 PetscInt nl,j; 5904 5905 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5906 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5907 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 5908 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5909 } 5910 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 5911 pcbddc->mat_graph->local_subs = local_subs; 5912 } 5913 pcbddc->graphanalyzed = PETSC_FALSE; 5914 } 5915 5916 if (pcbddc->graphanalyzed) { 5917 /* Graph's connected components analysis */ 5918 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 5919 pcbddc->graphanalyzed = PETSC_TRUE; 5920 } 5921 PetscFunctionReturn(0); 5922 } 5923 5924 #undef __FUNCT__ 5925 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 5926 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 5927 { 5928 PetscInt i,j; 5929 PetscScalar *alphas; 5930 PetscErrorCode ierr; 5931 5932 PetscFunctionBegin; 5933 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 5934 for (i=0;i<n;i++) { 5935 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 5936 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 5937 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 5938 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5939 } 5940 ierr = PetscFree(alphas);CHKERRQ(ierr); 5941 PetscFunctionReturn(0); 5942 } 5943 5944 #undef __FUNCT__ 5945 #define __FUNCT__ "MatISGetSubassemblingPattern" 5946 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5947 { 5948 Mat A; 5949 PetscInt n_neighs,*neighs,*n_shared,**shared; 5950 PetscMPIInt size,rank,color; 5951 PetscInt *xadj,*adjncy; 5952 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5953 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5954 PetscInt void_procs,*procs_candidates = NULL; 5955 PetscInt xadj_count, *count; 5956 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5957 PetscSubcomm psubcomm; 5958 MPI_Comm subcomm; 5959 PetscErrorCode ierr; 5960 5961 PetscFunctionBegin; 5962 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5963 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5964 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5965 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5966 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5967 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5968 5969 if (have_void) *have_void = PETSC_FALSE; 5970 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5971 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5972 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5973 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5974 im_active = !!(n); 5975 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5976 void_procs = size - active_procs; 5977 /* get ranks of of non-active processes in mat communicator */ 5978 if (void_procs) { 5979 PetscInt ncand; 5980 5981 if (have_void) *have_void = PETSC_TRUE; 5982 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5983 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5984 for (i=0,ncand=0;i<size;i++) { 5985 if (!procs_candidates[i]) { 5986 procs_candidates[ncand++] = i; 5987 } 5988 } 5989 /* force n_subdomains to be not greater that the number of non-active processes */ 5990 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5991 } 5992 5993 /* number of subdomains requested greater than active processes -> just shift the matrix 5994 number of subdomains requested 1 -> send to master or first candidate in voids */ 5995 if (active_procs < *n_subdomains || *n_subdomains == 1) { 5996 PetscInt issize,isidx,dest; 5997 if (*n_subdomains == 1) dest = 0; 5998 else dest = rank; 5999 if (im_active) { 6000 issize = 1; 6001 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6002 isidx = procs_candidates[dest]; 6003 } else { 6004 isidx = dest; 6005 } 6006 } else { 6007 issize = 0; 6008 isidx = -1; 6009 } 6010 *n_subdomains = active_procs; 6011 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6012 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6013 PetscFunctionReturn(0); 6014 } 6015 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6016 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6017 threshold = PetscMax(threshold,2); 6018 6019 /* Get info on mapping */ 6020 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 6021 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6022 6023 /* build local CSR graph of subdomains' connectivity */ 6024 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6025 xadj[0] = 0; 6026 xadj[1] = PetscMax(n_neighs-1,0); 6027 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6028 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6029 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 6030 for (i=1;i<n_neighs;i++) 6031 for (j=0;j<n_shared[i];j++) 6032 count[shared[i][j]] += 1; 6033 6034 xadj_count = 0; 6035 for (i=1;i<n_neighs;i++) { 6036 for (j=0;j<n_shared[i];j++) { 6037 if (count[shared[i][j]] < threshold) { 6038 adjncy[xadj_count] = neighs[i]; 6039 adjncy_wgt[xadj_count] = n_shared[i]; 6040 xadj_count++; 6041 break; 6042 } 6043 } 6044 } 6045 xadj[1] = xadj_count; 6046 ierr = PetscFree(count);CHKERRQ(ierr); 6047 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6048 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6049 6050 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6051 6052 /* Restrict work on active processes only */ 6053 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6054 if (void_procs) { 6055 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6056 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6057 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6058 subcomm = PetscSubcommChild(psubcomm); 6059 } else { 6060 psubcomm = NULL; 6061 subcomm = PetscObjectComm((PetscObject)mat); 6062 } 6063 6064 v_wgt = NULL; 6065 if (!color) { 6066 ierr = PetscFree(xadj);CHKERRQ(ierr); 6067 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6068 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6069 } else { 6070 Mat subdomain_adj; 6071 IS new_ranks,new_ranks_contig; 6072 MatPartitioning partitioner; 6073 PetscInt rstart=0,rend=0; 6074 PetscInt *is_indices,*oldranks; 6075 PetscMPIInt size; 6076 PetscBool aggregate; 6077 6078 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6079 if (void_procs) { 6080 PetscInt prank = rank; 6081 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6082 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6083 for (i=0;i<xadj[1];i++) { 6084 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6085 } 6086 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6087 } else { 6088 oldranks = NULL; 6089 } 6090 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6091 if (aggregate) { /* TODO: all this part could be made more efficient */ 6092 PetscInt lrows,row,ncols,*cols; 6093 PetscMPIInt nrank; 6094 PetscScalar *vals; 6095 6096 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6097 lrows = 0; 6098 if (nrank<redprocs) { 6099 lrows = size/redprocs; 6100 if (nrank<size%redprocs) lrows++; 6101 } 6102 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6103 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6104 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6105 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6106 row = nrank; 6107 ncols = xadj[1]-xadj[0]; 6108 cols = adjncy; 6109 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6110 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6111 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6112 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6113 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6114 ierr = PetscFree(xadj);CHKERRQ(ierr); 6115 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6116 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6117 ierr = PetscFree(vals);CHKERRQ(ierr); 6118 if (use_vwgt) { 6119 Vec v; 6120 const PetscScalar *array; 6121 PetscInt nl; 6122 6123 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6124 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6125 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6126 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6127 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6128 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6129 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6130 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6131 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6132 ierr = VecDestroy(&v);CHKERRQ(ierr); 6133 } 6134 } else { 6135 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6136 if (use_vwgt) { 6137 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6138 v_wgt[0] = local_size; 6139 } 6140 } 6141 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6142 6143 /* Partition */ 6144 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6145 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6146 if (v_wgt) { 6147 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6148 } 6149 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6150 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6151 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6152 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6153 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6154 6155 /* renumber new_ranks to avoid "holes" in new set of processors */ 6156 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6157 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6158 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6159 if (!aggregate) { 6160 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6161 #if defined(PETSC_USE_DEBUG) 6162 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6163 #endif 6164 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6165 } else if (oldranks) { 6166 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6167 } else { 6168 ranks_send_to_idx[0] = is_indices[0]; 6169 } 6170 } else { 6171 PetscInt idxs[1]; 6172 PetscMPIInt tag; 6173 MPI_Request *reqs; 6174 6175 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6176 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6177 for (i=rstart;i<rend;i++) { 6178 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6179 } 6180 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6181 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6182 ierr = PetscFree(reqs);CHKERRQ(ierr); 6183 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6184 #if defined(PETSC_USE_DEBUG) 6185 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6186 #endif 6187 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6188 } else if (oldranks) { 6189 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6190 } else { 6191 ranks_send_to_idx[0] = idxs[0]; 6192 } 6193 } 6194 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6195 /* clean up */ 6196 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6197 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6198 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6199 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6200 } 6201 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6202 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6203 6204 /* assemble parallel IS for sends */ 6205 i = 1; 6206 if (!color) i=0; 6207 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6208 PetscFunctionReturn(0); 6209 } 6210 6211 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6212 6213 #undef __FUNCT__ 6214 #define __FUNCT__ "MatISSubassemble" 6215 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6216 { 6217 Mat local_mat; 6218 IS is_sends_internal; 6219 PetscInt rows,cols,new_local_rows; 6220 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6221 PetscBool ismatis,isdense,newisdense,destroy_mat; 6222 ISLocalToGlobalMapping l2gmap; 6223 PetscInt* l2gmap_indices; 6224 const PetscInt* is_indices; 6225 MatType new_local_type; 6226 /* buffers */ 6227 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6228 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6229 PetscInt *recv_buffer_idxs_local; 6230 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6231 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6232 /* MPI */ 6233 MPI_Comm comm,comm_n; 6234 PetscSubcomm subcomm; 6235 PetscMPIInt n_sends,n_recvs,commsize; 6236 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6237 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6238 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6239 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6240 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6241 PetscErrorCode ierr; 6242 6243 PetscFunctionBegin; 6244 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6245 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6246 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6247 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6248 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6249 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6250 PetscValidLogicalCollectiveBool(mat,reuse,6); 6251 PetscValidLogicalCollectiveInt(mat,nis,8); 6252 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6253 if (nvecs) { 6254 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6255 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6256 } 6257 /* further checks */ 6258 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6259 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6260 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6261 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6262 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6263 if (reuse && *mat_n) { 6264 PetscInt mrows,mcols,mnrows,mncols; 6265 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6266 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6267 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6268 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6269 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6270 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6271 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6272 } 6273 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6274 PetscValidLogicalCollectiveInt(mat,bs,0); 6275 6276 /* prepare IS for sending if not provided */ 6277 if (!is_sends) { 6278 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6279 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6280 } else { 6281 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6282 is_sends_internal = is_sends; 6283 } 6284 6285 /* get comm */ 6286 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6287 6288 /* compute number of sends */ 6289 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6290 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6291 6292 /* compute number of receives */ 6293 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6294 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6295 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6296 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6297 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6298 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6299 ierr = PetscFree(iflags);CHKERRQ(ierr); 6300 6301 /* restrict comm if requested */ 6302 subcomm = 0; 6303 destroy_mat = PETSC_FALSE; 6304 if (restrict_comm) { 6305 PetscMPIInt color,subcommsize; 6306 6307 color = 0; 6308 if (restrict_full) { 6309 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6310 } else { 6311 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6312 } 6313 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6314 subcommsize = commsize - subcommsize; 6315 /* check if reuse has been requested */ 6316 if (reuse) { 6317 if (*mat_n) { 6318 PetscMPIInt subcommsize2; 6319 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6320 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6321 comm_n = PetscObjectComm((PetscObject)*mat_n); 6322 } else { 6323 comm_n = PETSC_COMM_SELF; 6324 } 6325 } else { /* MAT_INITIAL_MATRIX */ 6326 PetscMPIInt rank; 6327 6328 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6329 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6330 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6331 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6332 comm_n = PetscSubcommChild(subcomm); 6333 } 6334 /* flag to destroy *mat_n if not significative */ 6335 if (color) destroy_mat = PETSC_TRUE; 6336 } else { 6337 comm_n = comm; 6338 } 6339 6340 /* prepare send/receive buffers */ 6341 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6342 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6343 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6344 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6345 if (nis) { 6346 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6347 } 6348 6349 /* Get data from local matrices */ 6350 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6351 /* TODO: See below some guidelines on how to prepare the local buffers */ 6352 /* 6353 send_buffer_vals should contain the raw values of the local matrix 6354 send_buffer_idxs should contain: 6355 - MatType_PRIVATE type 6356 - PetscInt size_of_l2gmap 6357 - PetscInt global_row_indices[size_of_l2gmap] 6358 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6359 */ 6360 else { 6361 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6362 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6363 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6364 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6365 send_buffer_idxs[1] = i; 6366 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6367 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6368 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6369 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6370 for (i=0;i<n_sends;i++) { 6371 ilengths_vals[is_indices[i]] = len*len; 6372 ilengths_idxs[is_indices[i]] = len+2; 6373 } 6374 } 6375 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6376 /* additional is (if any) */ 6377 if (nis) { 6378 PetscMPIInt psum; 6379 PetscInt j; 6380 for (j=0,psum=0;j<nis;j++) { 6381 PetscInt plen; 6382 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6383 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6384 psum += len+1; /* indices + lenght */ 6385 } 6386 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6387 for (j=0,psum=0;j<nis;j++) { 6388 PetscInt plen; 6389 const PetscInt *is_array_idxs; 6390 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6391 send_buffer_idxs_is[psum] = plen; 6392 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6393 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6394 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6395 psum += plen+1; /* indices + lenght */ 6396 } 6397 for (i=0;i<n_sends;i++) { 6398 ilengths_idxs_is[is_indices[i]] = psum; 6399 } 6400 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6401 } 6402 6403 buf_size_idxs = 0; 6404 buf_size_vals = 0; 6405 buf_size_idxs_is = 0; 6406 buf_size_vecs = 0; 6407 for (i=0;i<n_recvs;i++) { 6408 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6409 buf_size_vals += (PetscInt)olengths_vals[i]; 6410 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6411 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6412 } 6413 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6414 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6415 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6416 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6417 6418 /* get new tags for clean communications */ 6419 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6420 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6421 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6422 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6423 6424 /* allocate for requests */ 6425 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6426 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6427 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6428 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6429 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6430 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6431 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6432 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6433 6434 /* communications */ 6435 ptr_idxs = recv_buffer_idxs; 6436 ptr_vals = recv_buffer_vals; 6437 ptr_idxs_is = recv_buffer_idxs_is; 6438 ptr_vecs = recv_buffer_vecs; 6439 for (i=0;i<n_recvs;i++) { 6440 source_dest = onodes[i]; 6441 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6442 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6443 ptr_idxs += olengths_idxs[i]; 6444 ptr_vals += olengths_vals[i]; 6445 if (nis) { 6446 source_dest = onodes_is[i]; 6447 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6448 ptr_idxs_is += olengths_idxs_is[i]; 6449 } 6450 if (nvecs) { 6451 source_dest = onodes[i]; 6452 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6453 ptr_vecs += olengths_idxs[i]-2; 6454 } 6455 } 6456 for (i=0;i<n_sends;i++) { 6457 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6458 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6459 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6460 if (nis) { 6461 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6462 } 6463 if (nvecs) { 6464 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6465 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6466 } 6467 } 6468 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6469 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6470 6471 /* assemble new l2g map */ 6472 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6473 ptr_idxs = recv_buffer_idxs; 6474 new_local_rows = 0; 6475 for (i=0;i<n_recvs;i++) { 6476 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6477 ptr_idxs += olengths_idxs[i]; 6478 } 6479 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6480 ptr_idxs = recv_buffer_idxs; 6481 new_local_rows = 0; 6482 for (i=0;i<n_recvs;i++) { 6483 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6484 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6485 ptr_idxs += olengths_idxs[i]; 6486 } 6487 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6488 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6489 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6490 6491 /* infer new local matrix type from received local matrices type */ 6492 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6493 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6494 if (n_recvs) { 6495 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6496 ptr_idxs = recv_buffer_idxs; 6497 for (i=0;i<n_recvs;i++) { 6498 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6499 new_local_type_private = MATAIJ_PRIVATE; 6500 break; 6501 } 6502 ptr_idxs += olengths_idxs[i]; 6503 } 6504 switch (new_local_type_private) { 6505 case MATDENSE_PRIVATE: 6506 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6507 new_local_type = MATSEQAIJ; 6508 bs = 1; 6509 } else { /* if I receive only 1 dense matrix */ 6510 new_local_type = MATSEQDENSE; 6511 bs = 1; 6512 } 6513 break; 6514 case MATAIJ_PRIVATE: 6515 new_local_type = MATSEQAIJ; 6516 bs = 1; 6517 break; 6518 case MATBAIJ_PRIVATE: 6519 new_local_type = MATSEQBAIJ; 6520 break; 6521 case MATSBAIJ_PRIVATE: 6522 new_local_type = MATSEQSBAIJ; 6523 break; 6524 default: 6525 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6526 break; 6527 } 6528 } else { /* by default, new_local_type is seqdense */ 6529 new_local_type = MATSEQDENSE; 6530 bs = 1; 6531 } 6532 6533 /* create MATIS object if needed */ 6534 if (!reuse) { 6535 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6536 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6537 } else { 6538 /* it also destroys the local matrices */ 6539 if (*mat_n) { 6540 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6541 } else { /* this is a fake object */ 6542 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6543 } 6544 } 6545 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6546 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6547 6548 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6549 6550 /* Global to local map of received indices */ 6551 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6552 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6553 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6554 6555 /* restore attributes -> type of incoming data and its size */ 6556 buf_size_idxs = 0; 6557 for (i=0;i<n_recvs;i++) { 6558 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6559 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6560 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6561 } 6562 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6563 6564 /* set preallocation */ 6565 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6566 if (!newisdense) { 6567 PetscInt *new_local_nnz=0; 6568 6569 ptr_idxs = recv_buffer_idxs_local; 6570 if (n_recvs) { 6571 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6572 } 6573 for (i=0;i<n_recvs;i++) { 6574 PetscInt j; 6575 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6576 for (j=0;j<*(ptr_idxs+1);j++) { 6577 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6578 } 6579 } else { 6580 /* TODO */ 6581 } 6582 ptr_idxs += olengths_idxs[i]; 6583 } 6584 if (new_local_nnz) { 6585 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6586 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6587 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6588 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6589 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6590 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6591 } else { 6592 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6593 } 6594 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6595 } else { 6596 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6597 } 6598 6599 /* set values */ 6600 ptr_vals = recv_buffer_vals; 6601 ptr_idxs = recv_buffer_idxs_local; 6602 for (i=0;i<n_recvs;i++) { 6603 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6604 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6605 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6606 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6607 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6608 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6609 } else { 6610 /* TODO */ 6611 } 6612 ptr_idxs += olengths_idxs[i]; 6613 ptr_vals += olengths_vals[i]; 6614 } 6615 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6616 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6617 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6618 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6619 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6620 6621 #if 0 6622 if (!restrict_comm) { /* check */ 6623 Vec lvec,rvec; 6624 PetscReal infty_error; 6625 6626 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6627 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6628 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6629 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6630 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6631 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6632 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6633 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6634 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6635 } 6636 #endif 6637 6638 /* assemble new additional is (if any) */ 6639 if (nis) { 6640 PetscInt **temp_idxs,*count_is,j,psum; 6641 6642 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6643 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6644 ptr_idxs = recv_buffer_idxs_is; 6645 psum = 0; 6646 for (i=0;i<n_recvs;i++) { 6647 for (j=0;j<nis;j++) { 6648 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6649 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6650 psum += plen; 6651 ptr_idxs += plen+1; /* shift pointer to received data */ 6652 } 6653 } 6654 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6655 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6656 for (i=1;i<nis;i++) { 6657 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6658 } 6659 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6660 ptr_idxs = recv_buffer_idxs_is; 6661 for (i=0;i<n_recvs;i++) { 6662 for (j=0;j<nis;j++) { 6663 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6664 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6665 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6666 ptr_idxs += plen+1; /* shift pointer to received data */ 6667 } 6668 } 6669 for (i=0;i<nis;i++) { 6670 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6671 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6672 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6673 } 6674 ierr = PetscFree(count_is);CHKERRQ(ierr); 6675 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6676 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6677 } 6678 /* free workspace */ 6679 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6680 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6681 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6682 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6683 if (isdense) { 6684 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6685 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6686 } else { 6687 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6688 } 6689 if (nis) { 6690 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6691 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6692 } 6693 6694 if (nvecs) { 6695 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6696 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6697 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6698 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6699 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6700 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6701 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6702 /* set values */ 6703 ptr_vals = recv_buffer_vecs; 6704 ptr_idxs = recv_buffer_idxs_local; 6705 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6706 for (i=0;i<n_recvs;i++) { 6707 PetscInt j; 6708 for (j=0;j<*(ptr_idxs+1);j++) { 6709 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6710 } 6711 ptr_idxs += olengths_idxs[i]; 6712 ptr_vals += olengths_idxs[i]-2; 6713 } 6714 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6715 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6716 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6717 } 6718 6719 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6720 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6721 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6722 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6723 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6724 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6725 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6726 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6727 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6728 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6729 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6730 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6731 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6732 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6733 ierr = PetscFree(onodes);CHKERRQ(ierr); 6734 if (nis) { 6735 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6736 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6737 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6738 } 6739 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6740 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6741 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6742 for (i=0;i<nis;i++) { 6743 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6744 } 6745 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6746 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6747 } 6748 *mat_n = NULL; 6749 } 6750 PetscFunctionReturn(0); 6751 } 6752 6753 /* temporary hack into ksp private data structure */ 6754 #include <petsc/private/kspimpl.h> 6755 6756 #undef __FUNCT__ 6757 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6758 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6759 { 6760 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6761 PC_IS *pcis = (PC_IS*)pc->data; 6762 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6763 Mat coarsedivudotp = NULL; 6764 MatNullSpace CoarseNullSpace = NULL; 6765 ISLocalToGlobalMapping coarse_islg; 6766 IS coarse_is,*isarray; 6767 PetscInt i,im_active=-1,active_procs=-1; 6768 PetscInt nis,nisdofs,nisneu,nisvert; 6769 PC pc_temp; 6770 PCType coarse_pc_type; 6771 KSPType coarse_ksp_type; 6772 PetscBool multilevel_requested,multilevel_allowed; 6773 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6774 Mat t_coarse_mat_is; 6775 PetscInt ncoarse; 6776 PetscBool compute_vecs = PETSC_FALSE; 6777 PetscScalar *array; 6778 MatReuse coarse_mat_reuse; 6779 PetscBool restr, full_restr, have_void; 6780 PetscErrorCode ierr; 6781 6782 PetscFunctionBegin; 6783 /* Assign global numbering to coarse dofs */ 6784 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6785 PetscInt ocoarse_size; 6786 compute_vecs = PETSC_TRUE; 6787 ocoarse_size = pcbddc->coarse_size; 6788 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6789 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6790 /* see if we can avoid some work */ 6791 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6792 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6793 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6794 PC pc; 6795 PetscBool isbddc; 6796 6797 /* temporary workaround since PCBDDC does not have a reset method so far */ 6798 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6799 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6800 if (isbddc) { 6801 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6802 } else { 6803 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6804 } 6805 coarse_reuse = PETSC_FALSE; 6806 } else { /* we can safely reuse already computed coarse matrix */ 6807 coarse_reuse = PETSC_TRUE; 6808 } 6809 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6810 coarse_reuse = PETSC_FALSE; 6811 } 6812 /* reset any subassembling information */ 6813 if (!coarse_reuse || pcbddc->recompute_topography) { 6814 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6815 } 6816 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6817 coarse_reuse = PETSC_TRUE; 6818 } 6819 /* assemble coarse matrix */ 6820 if (coarse_reuse && pcbddc->coarse_ksp) { 6821 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6822 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6823 coarse_mat_reuse = MAT_REUSE_MATRIX; 6824 } else { 6825 coarse_mat = NULL; 6826 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6827 } 6828 6829 /* creates temporary l2gmap and IS for coarse indexes */ 6830 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6831 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6832 6833 /* creates temporary MATIS object for coarse matrix */ 6834 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6835 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6836 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6837 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6838 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6839 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6840 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6841 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6842 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6843 6844 /* count "active" (i.e. with positive local size) and "void" processes */ 6845 im_active = !!(pcis->n); 6846 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6847 6848 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6849 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6850 /* full_restr : just use the receivers from the subassembling pattern */ 6851 coarse_mat_is = NULL; 6852 multilevel_allowed = PETSC_FALSE; 6853 multilevel_requested = PETSC_FALSE; 6854 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6855 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6856 if (multilevel_requested) { 6857 ncoarse = active_procs/pcbddc->coarsening_ratio; 6858 restr = PETSC_FALSE; 6859 full_restr = PETSC_FALSE; 6860 } else { 6861 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6862 restr = PETSC_TRUE; 6863 full_restr = PETSC_TRUE; 6864 } 6865 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6866 ncoarse = PetscMax(1,ncoarse); 6867 if (!pcbddc->coarse_subassembling) { 6868 if (pcbddc->coarsening_ratio > 1) { 6869 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6870 } else { 6871 PetscMPIInt size,rank; 6872 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6873 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6874 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6875 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6876 } 6877 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6878 PetscInt psum; 6879 PetscMPIInt size; 6880 if (pcbddc->coarse_ksp) psum = 1; 6881 else psum = 0; 6882 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6883 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6884 if (ncoarse < size) have_void = PETSC_TRUE; 6885 } 6886 /* determine if we can go multilevel */ 6887 if (multilevel_requested) { 6888 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6889 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6890 } 6891 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6892 6893 /* dump subassembling pattern */ 6894 if (pcbddc->dbg_flag && multilevel_allowed) { 6895 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6896 } 6897 6898 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6899 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6900 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6901 const PetscInt *idxs; 6902 ISLocalToGlobalMapping tmap; 6903 6904 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6905 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6906 /* allocate space for temporary storage */ 6907 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6908 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 6909 /* allocate for IS array */ 6910 nisdofs = pcbddc->n_ISForDofsLocal; 6911 nisneu = !!pcbddc->NeumannBoundariesLocal; 6912 nisvert = 0; /* nisvert is not used */ 6913 nis = nisdofs + nisneu + nisvert; 6914 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 6915 /* dofs splitting */ 6916 for (i=0;i<nisdofs;i++) { 6917 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 6918 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 6919 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6920 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6921 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6922 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6923 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6924 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 6925 } 6926 /* neumann boundaries */ 6927 if (pcbddc->NeumannBoundariesLocal) { 6928 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 6929 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 6930 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6931 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6932 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6933 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6934 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 6935 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 6936 } 6937 /* free memory */ 6938 ierr = PetscFree(tidxs);CHKERRQ(ierr); 6939 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 6940 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6941 } else { 6942 nis = 0; 6943 nisdofs = 0; 6944 nisneu = 0; 6945 nisvert = 0; 6946 isarray = NULL; 6947 } 6948 /* destroy no longer needed map */ 6949 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6950 6951 /* subassemble */ 6952 if (multilevel_allowed) { 6953 Vec vp[1]; 6954 PetscInt nvecs = 0; 6955 PetscBool reuse,reuser; 6956 6957 if (coarse_mat) reuse = PETSC_TRUE; 6958 else reuse = PETSC_FALSE; 6959 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6960 vp[0] = NULL; 6961 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6962 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6963 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6964 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6965 nvecs = 1; 6966 6967 if (pcbddc->divudotp) { 6968 Mat B,loc_divudotp; 6969 Vec v,p; 6970 IS dummy; 6971 PetscInt np; 6972 6973 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6974 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6975 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6976 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6977 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6978 ierr = VecSet(p,1.);CHKERRQ(ierr); 6979 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6980 ierr = VecDestroy(&p);CHKERRQ(ierr); 6981 ierr = MatDestroy(&B);CHKERRQ(ierr); 6982 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6983 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6984 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6985 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6986 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6987 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6988 ierr = VecDestroy(&v);CHKERRQ(ierr); 6989 } 6990 } 6991 if (reuser) { 6992 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6993 } else { 6994 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6995 } 6996 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6997 PetscScalar *arraym,*arrayv; 6998 PetscInt nl; 6999 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7000 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7001 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7002 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7003 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7004 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7005 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7006 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7007 } else { 7008 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7009 } 7010 } else { 7011 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 7012 } 7013 if (coarse_mat_is || coarse_mat) { 7014 PetscMPIInt size; 7015 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7016 if (!multilevel_allowed) { 7017 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7018 } else { 7019 Mat A; 7020 7021 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7022 if (coarse_mat_is) { 7023 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7024 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7025 coarse_mat = coarse_mat_is; 7026 } 7027 /* be sure we don't have MatSeqDENSE as local mat */ 7028 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7029 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7030 } 7031 } 7032 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7033 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7034 7035 /* create local to global scatters for coarse problem */ 7036 if (compute_vecs) { 7037 PetscInt lrows; 7038 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7039 if (coarse_mat) { 7040 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7041 } else { 7042 lrows = 0; 7043 } 7044 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7045 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7046 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7047 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7048 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7049 } 7050 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7051 7052 /* set defaults for coarse KSP and PC */ 7053 if (multilevel_allowed) { 7054 coarse_ksp_type = KSPRICHARDSON; 7055 coarse_pc_type = PCBDDC; 7056 } else { 7057 coarse_ksp_type = KSPPREONLY; 7058 coarse_pc_type = PCREDUNDANT; 7059 } 7060 7061 /* print some info if requested */ 7062 if (pcbddc->dbg_flag) { 7063 if (!multilevel_allowed) { 7064 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7065 if (multilevel_requested) { 7066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7067 } else if (pcbddc->max_levels) { 7068 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7069 } 7070 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7071 } 7072 } 7073 7074 /* create the coarse KSP object only once with defaults */ 7075 if (coarse_mat) { 7076 PetscViewer dbg_viewer = NULL; 7077 if (pcbddc->dbg_flag) { 7078 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7079 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7080 } 7081 if (!pcbddc->coarse_ksp) { 7082 char prefix[256],str_level[16]; 7083 size_t len; 7084 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7085 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7086 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7087 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7088 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7089 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7090 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7091 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7092 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7093 /* prefix */ 7094 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7095 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7096 if (!pcbddc->current_level) { 7097 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7098 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7099 } else { 7100 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7101 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7102 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7103 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7104 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7105 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7106 } 7107 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7108 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7109 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7110 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7111 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7112 /* allow user customization */ 7113 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7114 } 7115 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7116 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7117 if (nisdofs) { 7118 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7119 for (i=0;i<nisdofs;i++) { 7120 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7121 } 7122 } 7123 if (nisneu) { 7124 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7125 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7126 } 7127 if (nisvert) { 7128 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7129 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7130 } 7131 7132 /* get some info after set from options */ 7133 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7134 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7135 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7136 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7137 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7138 isbddc = PETSC_FALSE; 7139 } 7140 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7141 if (isredundant) { 7142 KSP inner_ksp; 7143 PC inner_pc; 7144 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7145 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7146 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7147 } 7148 7149 /* parameters which miss an API */ 7150 if (isbddc) { 7151 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7152 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7153 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7154 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7155 if (pcbddc_coarse->benign_saddle_point) { 7156 Mat coarsedivudotp_is; 7157 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7158 IS row,col; 7159 const PetscInt *gidxs; 7160 PetscInt n,st,M,N; 7161 7162 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7163 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7164 st = st-n; 7165 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7166 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7167 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7168 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7169 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7170 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7171 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7172 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7173 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7174 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7175 ierr = ISDestroy(&row);CHKERRQ(ierr); 7176 ierr = ISDestroy(&col);CHKERRQ(ierr); 7177 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7178 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7179 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7180 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7181 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7182 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7183 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7184 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7185 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7186 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7187 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7188 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7189 } 7190 } 7191 7192 /* propagate symmetry info of coarse matrix */ 7193 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7194 if (pc->pmat->symmetric_set) { 7195 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7196 } 7197 if (pc->pmat->hermitian_set) { 7198 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7199 } 7200 if (pc->pmat->spd_set) { 7201 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7202 } 7203 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7204 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7205 } 7206 /* set operators */ 7207 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7208 if (pcbddc->dbg_flag) { 7209 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7210 } 7211 } 7212 ierr = PetscFree(isarray);CHKERRQ(ierr); 7213 #if 0 7214 { 7215 PetscViewer viewer; 7216 char filename[256]; 7217 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7218 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7219 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7220 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7221 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7222 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7223 } 7224 #endif 7225 7226 if (pcbddc->coarse_ksp) { 7227 Vec crhs,csol; 7228 7229 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7230 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7231 if (!csol) { 7232 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7233 } 7234 if (!crhs) { 7235 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7236 } 7237 } 7238 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7239 7240 /* compute null space for coarse solver if the benign trick has been requested */ 7241 if (pcbddc->benign_null) { 7242 7243 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7244 for (i=0;i<pcbddc->benign_n;i++) { 7245 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7246 } 7247 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7248 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7249 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7250 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7251 if (coarse_mat) { 7252 Vec nullv; 7253 PetscScalar *array,*array2; 7254 PetscInt nl; 7255 7256 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7257 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7258 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7259 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7260 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7261 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7262 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7263 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7264 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7265 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7266 } 7267 } 7268 7269 if (pcbddc->coarse_ksp) { 7270 PetscBool ispreonly; 7271 7272 if (CoarseNullSpace) { 7273 PetscBool isnull; 7274 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7275 if (isnull) { 7276 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7277 } 7278 /* TODO: add local nullspaces (if any) */ 7279 } 7280 /* setup coarse ksp */ 7281 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7282 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7283 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7284 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7285 KSP check_ksp; 7286 KSPType check_ksp_type; 7287 PC check_pc; 7288 Vec check_vec,coarse_vec; 7289 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7290 PetscInt its; 7291 PetscBool compute_eigs; 7292 PetscReal *eigs_r,*eigs_c; 7293 PetscInt neigs; 7294 const char *prefix; 7295 7296 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7297 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7298 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7299 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7300 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7301 /* prevent from setup unneeded object */ 7302 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7303 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7304 if (ispreonly) { 7305 check_ksp_type = KSPPREONLY; 7306 compute_eigs = PETSC_FALSE; 7307 } else { 7308 check_ksp_type = KSPGMRES; 7309 compute_eigs = PETSC_TRUE; 7310 } 7311 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7312 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7313 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7314 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7315 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7316 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7317 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7318 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7319 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7320 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7321 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7322 /* create random vec */ 7323 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7324 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7325 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7326 /* solve coarse problem */ 7327 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7328 /* set eigenvalue estimation if preonly has not been requested */ 7329 if (compute_eigs) { 7330 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7331 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7332 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7333 if (neigs) { 7334 lambda_max = eigs_r[neigs-1]; 7335 lambda_min = eigs_r[0]; 7336 if (pcbddc->use_coarse_estimates) { 7337 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7338 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7339 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7340 } 7341 } 7342 } 7343 } 7344 7345 /* check coarse problem residual error */ 7346 if (pcbddc->dbg_flag) { 7347 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7348 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7349 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7350 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7351 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7352 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7353 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7354 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7355 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7356 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7357 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7358 if (CoarseNullSpace) { 7359 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7360 } 7361 if (compute_eigs) { 7362 PetscReal lambda_max_s,lambda_min_s; 7363 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7364 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7365 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7366 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7367 for (i=0;i<neigs;i++) { 7368 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7369 } 7370 } 7371 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7372 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7373 } 7374 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7375 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7376 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7377 if (compute_eigs) { 7378 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7379 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7380 } 7381 } 7382 } 7383 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7384 /* print additional info */ 7385 if (pcbddc->dbg_flag) { 7386 /* waits until all processes reaches this point */ 7387 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7388 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7389 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7390 } 7391 7392 /* free memory */ 7393 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7394 PetscFunctionReturn(0); 7395 } 7396 7397 #undef __FUNCT__ 7398 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7399 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7400 { 7401 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7402 PC_IS* pcis = (PC_IS*)pc->data; 7403 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7404 IS subset,subset_mult,subset_n; 7405 PetscInt local_size,coarse_size=0; 7406 PetscInt *local_primal_indices=NULL; 7407 const PetscInt *t_local_primal_indices; 7408 PetscErrorCode ierr; 7409 7410 PetscFunctionBegin; 7411 /* Compute global number of coarse dofs */ 7412 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7413 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7414 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7415 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7416 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7417 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7418 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7419 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7420 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7421 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7422 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7423 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7424 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7425 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7426 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7427 7428 /* check numbering */ 7429 if (pcbddc->dbg_flag) { 7430 PetscScalar coarsesum,*array,*array2; 7431 PetscInt i; 7432 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7433 7434 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7435 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7436 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7437 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7438 /* counter */ 7439 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7440 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7441 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7442 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7443 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7444 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7445 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7446 for (i=0;i<pcbddc->local_primal_size;i++) { 7447 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7448 } 7449 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7450 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7451 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7452 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7453 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7454 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7455 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7456 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7457 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7458 for (i=0;i<pcis->n;i++) { 7459 if (array[i] != 0.0 && array[i] != array2[i]) { 7460 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7461 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7462 set_error = PETSC_TRUE; 7463 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7464 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7465 } 7466 } 7467 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7468 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7469 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7470 for (i=0;i<pcis->n;i++) { 7471 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7472 } 7473 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7474 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7475 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7476 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7477 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7478 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7479 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7480 PetscInt *gidxs; 7481 7482 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7483 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7484 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7485 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7486 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7487 for (i=0;i<pcbddc->local_primal_size;i++) { 7488 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7489 } 7490 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7491 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7492 } 7493 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7494 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7495 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7496 } 7497 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7498 /* get back data */ 7499 *coarse_size_n = coarse_size; 7500 *local_primal_indices_n = local_primal_indices; 7501 PetscFunctionReturn(0); 7502 } 7503 7504 #undef __FUNCT__ 7505 #define __FUNCT__ "PCBDDCGlobalToLocal" 7506 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7507 { 7508 IS localis_t; 7509 PetscInt i,lsize,*idxs,n; 7510 PetscScalar *vals; 7511 PetscErrorCode ierr; 7512 7513 PetscFunctionBegin; 7514 /* get indices in local ordering exploiting local to global map */ 7515 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7516 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7517 for (i=0;i<lsize;i++) vals[i] = 1.0; 7518 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7519 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7520 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7521 if (idxs) { /* multilevel guard */ 7522 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7523 } 7524 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7525 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7526 ierr = PetscFree(vals);CHKERRQ(ierr); 7527 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7528 /* now compute set in local ordering */ 7529 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7530 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7531 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7532 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7533 for (i=0,lsize=0;i<n;i++) { 7534 if (PetscRealPart(vals[i]) > 0.5) { 7535 lsize++; 7536 } 7537 } 7538 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7539 for (i=0,lsize=0;i<n;i++) { 7540 if (PetscRealPart(vals[i]) > 0.5) { 7541 idxs[lsize++] = i; 7542 } 7543 } 7544 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7545 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7546 *localis = localis_t; 7547 PetscFunctionReturn(0); 7548 } 7549 7550 #undef __FUNCT__ 7551 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7552 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7553 { 7554 PC_IS *pcis=(PC_IS*)pc->data; 7555 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7556 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7557 Mat S_j; 7558 PetscInt *used_xadj,*used_adjncy; 7559 PetscBool free_used_adj; 7560 PetscErrorCode ierr; 7561 7562 PetscFunctionBegin; 7563 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7564 free_used_adj = PETSC_FALSE; 7565 if (pcbddc->sub_schurs_layers == -1) { 7566 used_xadj = NULL; 7567 used_adjncy = NULL; 7568 } else { 7569 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7570 used_xadj = pcbddc->mat_graph->xadj; 7571 used_adjncy = pcbddc->mat_graph->adjncy; 7572 } else if (pcbddc->computed_rowadj) { 7573 used_xadj = pcbddc->mat_graph->xadj; 7574 used_adjncy = pcbddc->mat_graph->adjncy; 7575 } else { 7576 PetscBool flg_row=PETSC_FALSE; 7577 const PetscInt *xadj,*adjncy; 7578 PetscInt nvtxs; 7579 7580 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7581 if (flg_row) { 7582 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7583 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7584 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7585 free_used_adj = PETSC_TRUE; 7586 } else { 7587 pcbddc->sub_schurs_layers = -1; 7588 used_xadj = NULL; 7589 used_adjncy = NULL; 7590 } 7591 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7592 } 7593 } 7594 7595 /* setup sub_schurs data */ 7596 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7597 if (!sub_schurs->schur_explicit) { 7598 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7599 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7600 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7601 } else { 7602 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7603 PetscBool isseqaij,need_change = PETSC_FALSE; 7604 PetscInt benign_n; 7605 Mat change = NULL; 7606 Vec scaling = NULL; 7607 IS change_primal = NULL; 7608 7609 if (!pcbddc->use_vertices && reuse_solvers) { 7610 PetscInt n_vertices; 7611 7612 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7613 reuse_solvers = (PetscBool)!n_vertices; 7614 } 7615 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7616 if (!isseqaij) { 7617 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7618 if (matis->A == pcbddc->local_mat) { 7619 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7620 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7621 } else { 7622 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7623 } 7624 } 7625 if (!pcbddc->benign_change_explicit) { 7626 benign_n = pcbddc->benign_n; 7627 } else { 7628 benign_n = 0; 7629 } 7630 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7631 We need a global reduction to avoid possible deadlocks. 7632 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7633 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7634 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7635 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7636 need_change = (PetscBool)(!need_change); 7637 } 7638 /* If the user defines additional constraints, we import them here. 7639 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7640 if (need_change) { 7641 PC_IS *pcisf; 7642 PC_BDDC *pcbddcf; 7643 PC pcf; 7644 7645 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7646 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7647 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7648 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7649 /* hacks */ 7650 pcisf = (PC_IS*)pcf->data; 7651 pcisf->is_B_local = pcis->is_B_local; 7652 pcisf->vec1_N = pcis->vec1_N; 7653 pcisf->BtoNmap = pcis->BtoNmap; 7654 pcisf->n = pcis->n; 7655 pcisf->n_B = pcis->n_B; 7656 pcbddcf = (PC_BDDC*)pcf->data; 7657 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7658 pcbddcf->mat_graph = pcbddc->mat_graph; 7659 pcbddcf->use_faces = PETSC_TRUE; 7660 pcbddcf->use_change_of_basis = PETSC_TRUE; 7661 pcbddcf->use_change_on_faces = PETSC_TRUE; 7662 pcbddcf->use_qr_single = PETSC_TRUE; 7663 pcbddcf->fake_change = PETSC_TRUE; 7664 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7665 /* store information on primal vertices and change of basis (in local numbering) */ 7666 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7667 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7668 change = pcbddcf->ConstraintMatrix; 7669 pcbddcf->ConstraintMatrix = NULL; 7670 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7671 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7672 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7673 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7674 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7675 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7676 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7677 pcf->ops->destroy = NULL; 7678 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7679 } 7680 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7681 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7682 ierr = MatDestroy(&change);CHKERRQ(ierr); 7683 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7684 } 7685 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7686 7687 /* free adjacency */ 7688 if (free_used_adj) { 7689 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7690 } 7691 PetscFunctionReturn(0); 7692 } 7693 7694 #undef __FUNCT__ 7695 #define __FUNCT__ "PCBDDCInitSubSchurs" 7696 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7697 { 7698 PC_IS *pcis=(PC_IS*)pc->data; 7699 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7700 PCBDDCGraph graph; 7701 PetscErrorCode ierr; 7702 7703 PetscFunctionBegin; 7704 /* attach interface graph for determining subsets */ 7705 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7706 IS verticesIS,verticescomm; 7707 PetscInt vsize,*idxs; 7708 7709 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7710 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7711 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7712 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7713 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7714 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7715 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7716 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7717 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7718 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7719 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7720 } else { 7721 graph = pcbddc->mat_graph; 7722 } 7723 /* print some info */ 7724 if (pcbddc->dbg_flag) { 7725 IS vertices; 7726 PetscInt nv,nedges,nfaces; 7727 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7728 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7729 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7730 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7731 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7732 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7733 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7734 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7735 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7736 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7737 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7738 } 7739 7740 /* sub_schurs init */ 7741 if (!pcbddc->sub_schurs) { 7742 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7743 } 7744 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 7745 7746 /* free graph struct */ 7747 if (pcbddc->sub_schurs_rebuild) { 7748 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7749 } 7750 PetscFunctionReturn(0); 7751 } 7752 7753 #undef __FUNCT__ 7754 #define __FUNCT__ "PCBDDCCheckOperator" 7755 PetscErrorCode PCBDDCCheckOperator(PC pc) 7756 { 7757 PC_IS *pcis=(PC_IS*)pc->data; 7758 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7759 PetscErrorCode ierr; 7760 7761 PetscFunctionBegin; 7762 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7763 IS zerodiag = NULL; 7764 Mat S_j,B0_B=NULL; 7765 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7766 PetscScalar *p0_check,*array,*array2; 7767 PetscReal norm; 7768 PetscInt i; 7769 7770 /* B0 and B0_B */ 7771 if (zerodiag) { 7772 IS dummy; 7773 7774 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7775 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7776 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7777 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7778 } 7779 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7780 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7781 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7782 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7783 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7784 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7785 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7786 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7787 /* S_j */ 7788 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7789 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7790 7791 /* mimic vector in \widetilde{W}_\Gamma */ 7792 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7793 /* continuous in primal space */ 7794 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7795 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7796 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7797 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7798 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7799 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7800 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7801 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7802 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7803 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7804 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7805 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7806 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7807 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7808 7809 /* assemble rhs for coarse problem */ 7810 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7811 /* local with Schur */ 7812 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7813 if (zerodiag) { 7814 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7815 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7816 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7817 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7818 } 7819 /* sum on primal nodes the local contributions */ 7820 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7821 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7822 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7823 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7824 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7825 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7826 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7827 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7828 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7829 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7830 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7831 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7832 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7833 /* scale primal nodes (BDDC sums contibutions) */ 7834 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7835 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7836 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7837 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7838 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7839 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7840 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7841 /* global: \widetilde{B0}_B w_\Gamma */ 7842 if (zerodiag) { 7843 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7844 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7845 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7846 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7847 } 7848 /* BDDC */ 7849 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7850 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7851 7852 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7853 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7854 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7855 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7856 for (i=0;i<pcbddc->benign_n;i++) { 7857 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7858 } 7859 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7860 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7861 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7862 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7863 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7864 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7865 } 7866 PetscFunctionReturn(0); 7867 } 7868