1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 if (!esize) PetscFunctionReturn(0); 84 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 85 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 86 87 /* gradients */ 88 ptr = work + 5*esize; 89 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 90 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 91 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 92 ierr = MatDestroy(&GE);CHKERRQ(ierr); 93 94 /* constants */ 95 ptr += rsize*csize; 96 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 97 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 99 ierr = MatDestroy(&GE);CHKERRQ(ierr); 100 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 101 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "PCBDDCNedelecSupport" 107 PetscErrorCode PCBDDCNedelecSupport(PC pc) 108 { 109 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 110 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 111 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 112 MatNullSpace nnsp; 113 Vec tvec,*quads; 114 PetscSF sfv; 115 ISLocalToGlobalMapping el2g,vl2g,fl2g; 116 MPI_Comm comm; 117 IS lned,primals,allprimals,nedfieldlocal; 118 IS *eedges,*extrows,*extcols,*alleedges; 119 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee; 120 PetscScalar *vals,*work; 121 PetscReal *rwork; 122 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 123 PetscInt ne,nv,Lv,order,n,field; 124 PetscInt n_neigh,*neigh,*n_shared,**shared; 125 PetscInt i,j,extmem,cum,maxsize,rst,nee,nquads=2; 126 PetscInt *extrow,*extrowcum,*marks,*emarks,*vmarks,*gidxs; 127 PetscInt *sfvleaves,*sfvroots; 128 PetscBool print,eerr,done,lrc[2],conforming; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 /* test variable order code and print debug info TODO: to be removed */ 133 print = PETSC_FALSE; 134 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 135 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 136 137 /* Return to caller if there are no edges in the decomposition */ 138 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 139 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 140 ierr = ISLocalToGlobalMappingGetSize(el2g,&n);CHKERRQ(ierr); 141 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 142 lrc[0] = PETSC_FALSE; 143 for (i=0;i<n;i++) { 144 if (PetscRealPart(vals[i]) > 2.) { 145 lrc[0] = PETSC_TRUE; 146 break; 147 } 148 } 149 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 150 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 151 if (!lrc[1]) PetscFunctionReturn(0); 152 153 /* Get discrete gradient 154 If it is defined for a subset of dofs, assumes G is given in global ordering for all the dofs */ 155 G = pcbddc->discretegradient; 156 order = pcbddc->nedorder; 157 conforming = pcbddc->conforming; 158 field = pcbddc->nedfield; 159 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 160 if (pcbddc->n_ISForDofsLocal && field > -1) { 161 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 162 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 163 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 164 } else { 165 PetscBool testnedfield = PETSC_FALSE; 166 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 167 if (!testnedfield) { 168 ne = n; 169 nedfieldlocal = NULL; 170 } else { 171 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 172 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 173 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 174 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 175 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 176 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 177 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 178 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 179 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 180 for (i=0,cum=0;i<n;i++) { 181 if (matis->sf_leafdata[i] > 1) { 182 matis->sf_leafdata[cum++] = i; 183 } 184 } 185 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 186 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 187 } 188 } 189 190 if (nedfieldlocal) { /* merge with previous code when testing is done */ 191 IS is; 192 193 /* need to map from the local Nedelec field to local numbering */ 194 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 195 /* need to map from the local Nedelec field to global numbering */ 196 ierr = ISLocalToGlobalMappingApplyIS(el2g,nedfieldlocal,&is);CHKERRQ(ierr); 197 ierr = ISLocalToGlobalMappingCreateIS(is,&el2g);CHKERRQ(ierr); 198 ierr = ISDestroy(&is);CHKERRQ(ierr); 199 } else { 200 ierr = PetscObjectReference((PetscObject)el2g);CHKERRQ(ierr); 201 fl2g = NULL; 202 } 203 204 /* Sanity checks */ 205 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 206 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 207 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 208 209 /* Drop connections for interior edges (this modifies G) */ 210 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 211 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 212 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 213 if (nedfieldlocal) { 214 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 215 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 216 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 217 } else { 218 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 219 } 220 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 221 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 222 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 223 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 224 if (matis->sf_rootdata[i] < 2) { 225 matis->sf_rootdata[cum++] = i + rst; 226 } 227 } 228 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 229 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 230 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 231 232 /* Extract subdomain relevant rows of G */ 233 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 234 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 235 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 236 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 237 ierr = ISDestroy(&lned);CHKERRQ(ierr); 238 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 239 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 240 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 241 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 242 if (print) { 243 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 244 ierr = MatView(lG,NULL);CHKERRQ(ierr); 245 } 246 247 /* SF for nodal communications */ 248 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 249 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 250 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 251 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 252 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 253 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 254 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 255 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 257 258 /* Destroy temporary G created in MATIS format */ 259 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 260 261 /* Save lG */ 262 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 263 264 /* Analyze the edge-nodes connections (duplicate lG) */ 265 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 266 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 267 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 268 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 269 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 270 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 271 /* need to import the boundary specification to ensure the 272 proper detection of coarse edges' endpoints */ 273 if (pcbddc->DirichletBoundariesLocal) { 274 IS is; 275 276 if (fl2g) { 277 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 278 } else { 279 is = pcbddc->DirichletBoundariesLocal; 280 } 281 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 283 for (i=0;i<cum;i++) { 284 if (idxs[i] >= 0) { 285 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 286 } 287 } 288 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 289 if (fl2g) { 290 ierr = ISDestroy(&is);CHKERRQ(ierr); 291 } 292 } 293 if (pcbddc->NeumannBoundariesLocal) { 294 IS is; 295 296 if (fl2g) { 297 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 298 } else { 299 is = pcbddc->NeumannBoundariesLocal; 300 } 301 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 302 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 303 for (i=0;i<cum;i++) { 304 if (idxs[i] >= 0) { 305 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 306 } 307 } 308 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 309 if (fl2g) { 310 ierr = ISDestroy(&is);CHKERRQ(ierr); 311 } 312 } 313 314 /* need to remove coarse faces' dofs to ensure the 315 proper detection of coarse edges' endpoints */ 316 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 317 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 318 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 319 for (i=1;i<n_neigh;i++) 320 for (j=0;j<n_shared[i];j++) 321 marks[shared[i][j]]++; 322 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 323 for (i=0;i<ne;i++) { 324 if (marks[i] > 1 || (marks[i] == 1 && PetscBTLookup(btb,i))) { 325 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 326 } 327 } 328 329 if (!conforming) { 330 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 331 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 332 } 333 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 334 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 335 cum = 0; 336 for (i=0;i<ne;i++) { 337 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 338 if (!PetscBTLookup(btee,i)) { 339 marks[cum++] = i; 340 continue; 341 } 342 /* set badly connected edge dofs as primal */ 343 if (!conforming) { 344 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 345 marks[cum++] = i; 346 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 347 for (j=ii[i];j<ii[i+1];j++) { 348 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 349 } 350 } else { 351 /* every edge dofs should be connected trough a certain number of nodal dofs 352 to other edge dofs belonging to coarse edges 353 - at most 2 endpoints 354 - order-1 interior nodal dofs 355 - no undefined nodal dofs (nconn < order) 356 */ 357 PetscInt ends = 0,ints = 0, undef = 0; 358 for (j=ii[i];j<ii[i+1];j++) { 359 PetscInt v = jj[j],k; 360 PetscInt nconn = iit[v+1]-iit[v]; 361 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 362 if (nconn > order) ends++; 363 else if (nconn == order) ints++; 364 else undef++; 365 } 366 if (undef || ends > 2 || ints != order -1) { 367 marks[cum++] = i; 368 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 369 for (j=ii[i];j<ii[i+1];j++) { 370 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 371 } 372 } 373 } 374 } 375 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 376 if (!order && ii[i+1] != ii[i]) { 377 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 378 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 379 } 380 } 381 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 382 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 383 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 384 if (!conforming) { 385 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 386 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 387 } 388 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 389 /* identify splitpoints and corner candidates: TODO variable order */ 390 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 391 if (print) { 392 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 393 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 394 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 395 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 396 } 397 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 398 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 399 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 400 for (i=0;i<nv;i++) { 401 PetscInt ord = order, test = ii[i+1]-ii[i]; 402 if (!order) { 403 PetscReal vorder = 0.; 404 405 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 406 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 407 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 408 ord = 1; 409 } 410 #if defined(PETSC_USE_DEBUG) 411 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 412 #endif 413 if (test >= 3*ord) { /* splitpoints */ 414 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 415 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 416 } else if (test == ord) { 417 if (order == 1) { 418 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 419 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 420 } else { 421 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 422 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 423 } 424 } 425 } 426 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 427 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 428 429 /* Get the local G^T explicitly */ 430 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 431 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 432 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 433 434 /* Mark interior nodal dofs */ 435 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 436 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 437 for (i=1;i<n_neigh;i++) { 438 for (j=0;j<n_shared[i];j++) { 439 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 440 } 441 } 442 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 443 444 /* communicate corners and splitpoints */ 445 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 446 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 447 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 448 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 449 450 if (print) { 451 IS tbz; 452 453 cum = 0; 454 for (i=0;i<nv;i++) 455 if (sfvleaves[i]) 456 vmarks[cum++] = i; 457 458 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 459 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 460 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 461 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 462 } 463 464 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 465 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 466 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 467 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 468 469 /* Zero rows of lGt corresponding to identified corners 470 and interior nodal dofs */ 471 cum = 0; 472 for (i=0;i<nv;i++) { 473 if (sfvleaves[i]) { 474 vmarks[cum++] = i; 475 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 476 } 477 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 478 } 479 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 480 if (print) { 481 IS tbz; 482 483 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 484 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 485 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 486 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 487 } 488 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 489 ierr = PetscFree(vmarks);CHKERRQ(ierr); 490 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 491 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 492 493 /* Recompute G */ 494 ierr = MatDestroy(&lG);CHKERRQ(ierr); 495 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 496 if (print) { 497 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 498 ierr = MatView(lG,NULL);CHKERRQ(ierr); 499 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 500 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 501 } 502 503 /* Get primal dofs (if any) */ 504 cum = 0; 505 for (i=0;i<ne;i++) { 506 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 507 } 508 if (fl2g) { 509 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 510 } 511 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 512 if (print) { 513 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 514 ierr = ISView(primals,NULL);CHKERRQ(ierr); 515 } 516 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 517 /* TODO: what if the user passed in some of them ? */ 518 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 519 ierr = ISDestroy(&primals);CHKERRQ(ierr); 520 521 /* Compute edge connectivity */ 522 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 523 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 524 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 525 if (done && i && ii && jj) { /* when lG is empty, don't pass pointers */ 526 if (fl2g) { 527 PetscBT btf; 528 PetscInt *iia,*jja,*iiu,*jju; 529 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 530 531 /* create CSR for all local dofs */ 532 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 533 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 534 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 535 iiu = pcbddc->mat_graph->xadj; 536 jju = pcbddc->mat_graph->adjncy; 537 } else if (pcbddc->use_local_adj) { 538 rest = PETSC_TRUE; 539 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 540 } else { 541 free = PETSC_TRUE; 542 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 543 iiu[0] = 0; 544 for (i=0;i<n;i++) { 545 iiu[i+1] = i+1; 546 jju[i] = -1; 547 } 548 } 549 550 /* import sizes of CSR */ 551 iia[0] = 0; 552 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 553 554 /* overwrite entries corresponding to the Nedelec field */ 555 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 556 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 557 for (i=0;i<ne;i++) { 558 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 559 iia[idxs[i]+1] = ii[i+1]-ii[i]; 560 } 561 562 /* iia in CSR */ 563 for (i=0;i<n;i++) iia[i+1] += iia[i]; 564 565 /* jja in CSR */ 566 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 567 for (i=0;i<n;i++) 568 if (!PetscBTLookup(btf,i)) 569 for (j=0;j<iiu[i+1]-iiu[i];j++) 570 jja[iia[i]+j] = jju[iiu[i]+j]; 571 572 /* map edge dofs connectivity */ 573 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 574 for (i=0;i<ne;i++) { 575 PetscInt e = idxs[i]; 576 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 577 } 578 579 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 580 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 581 if (rest) { 582 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 583 } 584 if (free) { 585 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 586 } 587 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 588 } else { 589 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 590 } 591 } 592 593 /* Analyze interface for edge dofs */ 594 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 595 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 596 597 /* Get coarse edges in the edge space */ 598 pcbddc->mat_graph->twodim = PETSC_FALSE; 599 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 600 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 601 602 if (fl2g) { 603 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 604 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 605 for (i=0;i<nee;i++) { 606 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 607 } 608 } else { 609 eedges = alleedges; 610 primals = allprimals; 611 } 612 613 /* Mark fine edge dofs with their coarse edge id */ 614 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 615 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 616 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 617 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 618 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 619 if (print) { 620 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 621 ierr = ISView(primals,NULL);CHKERRQ(ierr); 622 } 623 624 maxsize = 0; 625 for (i=0;i<nee;i++) { 626 PetscInt size,mark = i+1; 627 628 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 629 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 630 for (j=0;j<size;j++) marks[idxs[j]] = mark; 631 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 632 maxsize = PetscMax(maxsize,size); 633 } 634 635 /* Find coarse edge endpoints */ 636 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 637 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 638 for (i=0;i<nee;i++) { 639 PetscInt mark = i+1,size; 640 641 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 642 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 643 if (print) { 644 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 645 ISView(eedges[i],NULL); 646 } 647 for (j=0;j<size;j++) { 648 PetscInt k, ee = idxs[j]; 649 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 650 for (k=ii[ee];k<ii[ee+1];k++) { 651 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 652 if (PetscBTLookup(btv,jj[k])) { 653 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 654 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 655 PetscInt k2; 656 PetscBool corner = PETSC_FALSE; 657 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 658 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 659 /* it's a corner if either is connected with an edge dof belonging to a different cc or 660 if the edge dof lie on the natural part of the boundary */ 661 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 662 corner = PETSC_TRUE; 663 break; 664 } 665 } 666 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 667 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 668 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 669 } else { 670 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 671 } 672 } 673 } 674 } 675 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 676 } 677 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 678 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 679 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 680 681 /* Reset marked primal dofs */ 682 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 683 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 684 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 685 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 686 687 /* Now use the initial lG */ 688 ierr = MatDestroy(&lG);CHKERRQ(ierr); 689 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 690 lG = lGinit; 691 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 692 693 /* Compute extended cols indices */ 694 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 695 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 696 i *= maxsize; 697 ierr = PetscMalloc1(nee,&extcols);CHKERRQ(ierr); 698 ierr = PetscCalloc1(nee,&emarks);CHKERRQ(ierr); 699 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 700 eerr = PETSC_FALSE; 701 for (i=0;i<nee;i++) { 702 PetscInt size; 703 704 cum = 0; 705 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 706 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 707 for (j=0;j<size;j++) { 708 PetscInt k,ee = idxs[j]; 709 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 710 } 711 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 712 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 713 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 714 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 715 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 716 /* it may happen that endpoints are not defined at this point 717 if it is the case, mark this edge for a second pass */ 718 if (cum != size -1) { 719 emarks[i] = 1; 720 if (print) { 721 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 722 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 723 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 724 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 725 } 726 eerr = PETSC_TRUE; 727 } 728 } 729 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 730 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 731 if (done) { 732 PetscInt *newprimals; 733 734 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 735 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 736 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 737 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 738 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 739 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 740 for (i=0;i<nee;i++) { 741 if (emarks[i]) { 742 PetscInt size,mark = i+1; 743 744 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 745 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 746 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 747 for (j=0;j<size;j++) { 748 PetscInt k,ee = idxs[j]; 749 for (k=ii[ee];k<ii[ee+1];k++) { 750 /* set all candidates located on the edge as corners */ 751 if (PetscBTLookup(btvcand,jj[k])) { 752 PetscInt k2,vv = jj[k]; 753 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 754 /* set all edge dofs connected to candidate as primals */ 755 for (k2=iit[vv];k2<iit[vv+1];k2++) { 756 if (marks[jjt[k2]] == mark) { 757 PetscInt k3,ee2 = jjt[k2]; 758 newprimals[cum++] = ee2; 759 /* finally set the new corners */ 760 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 761 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 762 } 763 } 764 } 765 } 766 } 767 } 768 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 769 } 770 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 771 } 772 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 773 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 774 if (fl2g) { 775 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 776 ierr = ISDestroy(&primals);CHKERRQ(ierr); 777 for (i=0;i<nee;i++) { 778 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 779 } 780 ierr = PetscFree(eedges);CHKERRQ(ierr); 781 } 782 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 783 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 784 ierr = PetscFree(newprimals);CHKERRQ(ierr); 785 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 786 ierr = ISDestroy(&primals);CHKERRQ(ierr); 787 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 788 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 789 if (fl2g) { 790 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 791 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 792 for (i=0;i<nee;i++) { 793 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 794 } 795 } else { 796 eedges = alleedges; 797 primals = allprimals; 798 } 799 800 /* Mark again */ 801 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 802 for (i=0;i<nee;i++) { 803 PetscInt size,mark = i+1; 804 805 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 806 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 807 for (j=0;j<size;j++) marks[idxs[j]] = mark; 808 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 809 } 810 if (print) { 811 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 812 ierr = ISView(primals,NULL);CHKERRQ(ierr); 813 } 814 815 /* Recompute extended cols */ 816 eerr = PETSC_FALSE; 817 for (i=0;i<nee;i++) { 818 PetscInt size; 819 820 cum = 0; 821 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 822 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 823 for (j=0;j<size;j++) { 824 PetscInt k,ee = idxs[j]; 825 for (k=ii[ee];k<ii[ee+1];k++) { 826 if (!PetscBTLookup(btv,jj[k])) { 827 extrow[cum++] = jj[k]; 828 } 829 } 830 } 831 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 832 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 833 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 834 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 835 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 836 if (cum != size -1) { 837 if (print) { 838 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 839 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 840 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 841 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 842 } 843 eerr = PETSC_TRUE; 844 } 845 } 846 } 847 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 848 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 849 ierr = PetscFree(emarks);CHKERRQ(ierr); 850 /* an error should not occur at this point */ 851 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 852 853 /* Check the number of endpoints */ 854 /* TODO: fix case for circular edge */ 855 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 856 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 857 for (i=0;i<nee;i++) { 858 PetscInt size, found = 0; 859 860 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 861 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 862 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 863 for (j=0;j<size;j++) { 864 PetscInt k,ee = idxs[j]; 865 for (k=ii[ee];k<ii[ee+1];k++) { 866 PetscInt vv = jj[k]; 867 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 868 found++; 869 } 870 } 871 } 872 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 873 if (found != 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d\n",found,i); 874 } 875 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 876 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 877 878 #if defined(PETSC_USE_DEBUG) 879 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 880 not interfere with neighbouring coarse edges */ 881 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 883 for (i=0;i<nv;i++) { 884 PetscInt emax = 0,eemax = 0; 885 886 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 887 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 888 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 889 for (j=1;j<nee+1;j++) { 890 if (emax < emarks[j]) { 891 emax = emarks[j]; 892 eemax = j; 893 } 894 } 895 /* not relevant for edges */ 896 if (!eemax) continue; 897 898 for (j=ii[i];j<ii[i+1];j++) { 899 if (marks[jj[j]] && marks[jj[j]] != eemax) { 900 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 901 } 902 } 903 } 904 ierr = PetscFree(emarks);CHKERRQ(ierr); 905 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 906 #endif 907 908 /* Compute extended rows indices for edge blocks of the change of basis */ 909 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 910 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 911 extmem *= maxsize; 912 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 913 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 914 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 915 for (i=0;i<nv;i++) { 916 PetscInt mark = 0,size,start; 917 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 918 for (j=ii[i];j<ii[i+1];j++) 919 if (marks[jj[j]] && !mark) 920 mark = marks[jj[j]]; 921 922 /* not relevant */ 923 if (!mark) continue; 924 925 /* import extended row */ 926 mark--; 927 start = mark*extmem+extrowcum[mark]; 928 size = ii[i+1]-ii[i]; 929 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 930 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 931 extrowcum[mark] += size; 932 } 933 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 934 cum = 0; 935 for (i=0;i<nee;i++) { 936 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 937 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 938 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 939 cum = PetscMax(cum,size); 940 } 941 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 942 ierr = PetscFree(marks);CHKERRQ(ierr); 943 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 944 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 945 946 /* Workspace for lapack inner calls and VecSetValues */ 947 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 948 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 949 for (i=0;i<maxsize;i++) vals[i] = 1.; 950 951 /* Create vectors for quadrature rules */ 952 /* TODO preserve other quadratures */ 953 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 954 for (i=0;i<nquads;i++) { 955 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 956 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 957 } 958 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 959 960 /* Create change of basis matrix (preallocation can be improved) */ 961 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 962 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 963 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 964 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 965 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 966 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 967 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 968 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 969 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 970 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 971 972 /* Defaults to identity */ 973 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 974 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 975 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 976 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 977 978 for (i=0;i<nee;i++) { 979 Mat Gins = NULL, GKins = NULL; 980 981 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 982 if (Gins && GKins) { 983 PetscScalar *data; 984 const PetscInt *rows,*cols; 985 PetscInt nrh,nch,nrc,ncc; 986 987 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 988 /* H1 */ 989 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 990 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 991 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 992 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 993 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 994 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 995 /* complement */ 996 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 997 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 998 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 999 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1000 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1001 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1002 /* Gins kernel quadratures */ 1003 for (j=0;j<ncc;j++) { 1004 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 1005 } 1006 /* H1 average */ 1007 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1008 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1009 } 1010 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1011 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1012 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1013 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1014 } 1015 1016 /* Start assembling */ 1017 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1018 for (i=0;i<nquads;i++) { 1019 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 1020 } 1021 1022 /* Free */ 1023 if (fl2g) { 1024 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1025 for (i=0;i<nee;i++) { 1026 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1027 } 1028 ierr = PetscFree(eedges);CHKERRQ(ierr); 1029 } 1030 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1031 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1032 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1033 ierr = PetscFree(extrow);CHKERRQ(ierr); 1034 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1035 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1036 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1037 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1038 ierr = PetscFree(vals);CHKERRQ(ierr); 1039 ierr = PetscFree(extrows);CHKERRQ(ierr); 1040 ierr = PetscFree(extcols);CHKERRQ(ierr); 1041 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1042 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1043 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1044 1045 /* Complete assembling */ 1046 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1047 for (i=0;i<nquads;i++) { 1048 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 1049 } 1050 for (i=0;i<nquads;i++) { 1051 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 1052 } 1053 ierr = PetscFree(quads);CHKERRQ(ierr); 1054 1055 /* set change of basis */ 1056 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1057 ierr = MatDestroy(&T);CHKERRQ(ierr); 1058 1059 /* set quadratures */ 1060 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 1061 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 1062 1063 PetscFunctionReturn(0); 1064 } 1065 1066 /* the near-null space of BDDC carries information on quadrature weights, 1067 and these can be collinear -> so cheat with MatNullSpaceCreate 1068 and create a suitable set of basis vectors first */ 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1071 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1072 { 1073 PetscErrorCode ierr; 1074 PetscInt i; 1075 1076 PetscFunctionBegin; 1077 for (i=0;i<nvecs;i++) { 1078 PetscInt first,last; 1079 1080 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1081 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1082 if (i>=first && i < last) { 1083 PetscScalar *data; 1084 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1085 if (!has_const) { 1086 data[i-first] = 1.; 1087 } else { 1088 data[2*i-first] = 1./PetscSqrtReal(2.); 1089 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1090 } 1091 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1092 } 1093 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1094 } 1095 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1096 for (i=0;i<nvecs;i++) { /* reset vectors */ 1097 PetscInt first,last; 1098 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1099 if (i>=first && i < last) { 1100 PetscScalar *data; 1101 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1102 if (!has_const) { 1103 data[i-first] = 0.; 1104 } else { 1105 data[2*i-first] = 0.; 1106 data[2*i-first+1] = 0.; 1107 } 1108 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1109 } 1110 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1117 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1118 { 1119 Mat loc_divudotp; 1120 Vec p,v,vins,quad_vec,*quad_vecs; 1121 ISLocalToGlobalMapping map; 1122 IS *faces,*edges; 1123 PetscScalar *vals; 1124 const PetscScalar *array; 1125 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1126 PetscMPIInt rank; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1131 if (graph->twodim) { 1132 lmaxneighs = 2; 1133 } else { 1134 lmaxneighs = 1; 1135 for (i=0;i<ne;i++) { 1136 const PetscInt *idxs; 1137 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1138 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1139 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1140 } 1141 lmaxneighs++; /* graph count does not include self */ 1142 } 1143 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1144 maxsize = 0; 1145 for (i=0;i<ne;i++) { 1146 PetscInt nn; 1147 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1148 maxsize = PetscMax(maxsize,nn); 1149 } 1150 for (i=0;i<nf;i++) { 1151 PetscInt nn; 1152 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1153 maxsize = PetscMax(maxsize,nn); 1154 } 1155 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1156 /* create vectors to hold quadrature weights */ 1157 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1158 if (!transpose) { 1159 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1160 } else { 1161 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1162 } 1163 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1164 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1165 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1166 for (i=0;i<maxneighs;i++) { 1167 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1168 } 1169 1170 /* compute local quad vec */ 1171 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1172 if (!transpose) { 1173 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1174 } else { 1175 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1176 } 1177 ierr = VecSet(p,1.);CHKERRQ(ierr); 1178 if (!transpose) { 1179 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1180 } else { 1181 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1182 } 1183 if (vl2l) { 1184 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1185 } else { 1186 vins = v; 1187 } 1188 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1189 ierr = VecDestroy(&p);CHKERRQ(ierr); 1190 1191 /* insert in global quadrature vecs */ 1192 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1193 for (i=0;i<nf;i++) { 1194 const PetscInt *idxs; 1195 PetscInt idx,nn,j; 1196 1197 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1198 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1199 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1200 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1201 idx = -(idx+1); 1202 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1203 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1204 } 1205 for (i=0;i<ne;i++) { 1206 const PetscInt *idxs; 1207 PetscInt idx,nn,j; 1208 1209 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1210 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1211 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1212 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1213 idx = -(idx+1); 1214 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1215 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1216 } 1217 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1218 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1219 if (vl2l) { 1220 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1221 } 1222 ierr = VecDestroy(&v);CHKERRQ(ierr); 1223 ierr = PetscFree(vals);CHKERRQ(ierr); 1224 1225 /* assemble near null space */ 1226 for (i=0;i<maxneighs;i++) { 1227 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1228 } 1229 for (i=0;i<maxneighs;i++) { 1230 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1231 } 1232 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1239 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1240 { 1241 PetscErrorCode ierr; 1242 Vec local,global; 1243 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1244 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1245 1246 PetscFunctionBegin; 1247 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1248 /* need to convert from global to local topology information and remove references to information in global ordering */ 1249 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1250 if (pcbddc->user_provided_isfordofs) { 1251 if (pcbddc->n_ISForDofs) { 1252 PetscInt i; 1253 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1254 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1255 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1256 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1257 } 1258 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1259 pcbddc->n_ISForDofs = 0; 1260 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1261 } 1262 } else { 1263 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1264 PetscInt i, n = matis->A->rmap->n; 1265 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1266 if (i > 1) { 1267 pcbddc->n_ISForDofsLocal = i; 1268 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1269 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1270 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1271 } 1272 } 1273 } 1274 } 1275 1276 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1277 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1278 } 1279 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1280 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1281 } 1282 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1283 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1284 } 1285 ierr = VecDestroy(&global);CHKERRQ(ierr); 1286 ierr = VecDestroy(&local);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1292 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1293 { 1294 PC_IS *pcis = (PC_IS*)(pc->data); 1295 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1296 PetscErrorCode ierr; 1297 1298 PetscFunctionBegin; 1299 if (!pcbddc->benign_have_null) { 1300 PetscFunctionReturn(0); 1301 } 1302 if (pcbddc->ChangeOfBasisMatrix) { 1303 Vec swap; 1304 1305 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1306 swap = pcbddc->work_change; 1307 pcbddc->work_change = r; 1308 r = swap; 1309 } 1310 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1311 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1312 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1313 ierr = VecSet(z,0.);CHKERRQ(ierr); 1314 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1315 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1316 if (pcbddc->ChangeOfBasisMatrix) { 1317 Vec swap; 1318 1319 swap = r; 1320 r = pcbddc->work_change; 1321 pcbddc->work_change = swap; 1322 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1323 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1324 } 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1330 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1331 { 1332 PCBDDCBenignMatMult_ctx ctx; 1333 PetscErrorCode ierr; 1334 PetscBool apply_right,apply_left,reset_x; 1335 1336 PetscFunctionBegin; 1337 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1338 if (transpose) { 1339 apply_right = ctx->apply_left; 1340 apply_left = ctx->apply_right; 1341 } else { 1342 apply_right = ctx->apply_right; 1343 apply_left = ctx->apply_left; 1344 } 1345 reset_x = PETSC_FALSE; 1346 if (apply_right) { 1347 const PetscScalar *ax; 1348 PetscInt nl,i; 1349 1350 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1351 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1352 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1353 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1354 for (i=0;i<ctx->benign_n;i++) { 1355 PetscScalar sum,val; 1356 const PetscInt *idxs; 1357 PetscInt nz,j; 1358 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1359 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1360 sum = 0.; 1361 if (ctx->apply_p0) { 1362 val = ctx->work[idxs[nz-1]]; 1363 for (j=0;j<nz-1;j++) { 1364 sum += ctx->work[idxs[j]]; 1365 ctx->work[idxs[j]] += val; 1366 } 1367 } else { 1368 for (j=0;j<nz-1;j++) { 1369 sum += ctx->work[idxs[j]]; 1370 } 1371 } 1372 ctx->work[idxs[nz-1]] -= sum; 1373 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1374 } 1375 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1376 reset_x = PETSC_TRUE; 1377 } 1378 if (transpose) { 1379 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1380 } else { 1381 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1382 } 1383 if (reset_x) { 1384 ierr = VecResetArray(x);CHKERRQ(ierr); 1385 } 1386 if (apply_left) { 1387 PetscScalar *ay; 1388 PetscInt i; 1389 1390 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1391 for (i=0;i<ctx->benign_n;i++) { 1392 PetscScalar sum,val; 1393 const PetscInt *idxs; 1394 PetscInt nz,j; 1395 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1396 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1397 val = -ay[idxs[nz-1]]; 1398 if (ctx->apply_p0) { 1399 sum = 0.; 1400 for (j=0;j<nz-1;j++) { 1401 sum += ay[idxs[j]]; 1402 ay[idxs[j]] += val; 1403 } 1404 ay[idxs[nz-1]] += sum; 1405 } else { 1406 for (j=0;j<nz-1;j++) { 1407 ay[idxs[j]] += val; 1408 } 1409 ay[idxs[nz-1]] = 0.; 1410 } 1411 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1412 } 1413 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1414 } 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1420 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1421 { 1422 PetscErrorCode ierr; 1423 1424 PetscFunctionBegin; 1425 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1431 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1432 { 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "PCBDDCBenignShellMat" 1442 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1443 { 1444 PC_IS *pcis = (PC_IS*)pc->data; 1445 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1446 PCBDDCBenignMatMult_ctx ctx; 1447 PetscErrorCode ierr; 1448 1449 PetscFunctionBegin; 1450 if (!restore) { 1451 Mat A_IB,A_BI; 1452 PetscScalar *work; 1453 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1454 1455 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1456 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1457 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1458 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1459 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1460 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1461 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1462 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1463 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1464 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1465 ctx->apply_left = PETSC_TRUE; 1466 ctx->apply_right = PETSC_FALSE; 1467 ctx->apply_p0 = PETSC_FALSE; 1468 ctx->benign_n = pcbddc->benign_n; 1469 if (reuse) { 1470 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1471 ctx->free = PETSC_FALSE; 1472 } else { /* TODO: could be optimized for successive solves */ 1473 ISLocalToGlobalMapping N_to_D; 1474 PetscInt i; 1475 1476 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1477 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1478 for (i=0;i<pcbddc->benign_n;i++) { 1479 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1480 } 1481 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1482 ctx->free = PETSC_TRUE; 1483 } 1484 ctx->A = pcis->A_IB; 1485 ctx->work = work; 1486 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1487 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1488 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1489 pcis->A_IB = A_IB; 1490 1491 /* A_BI as A_IB^T */ 1492 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1493 pcbddc->benign_original_mat = pcis->A_BI; 1494 pcis->A_BI = A_BI; 1495 } else { 1496 if (!pcbddc->benign_original_mat) { 1497 PetscFunctionReturn(0); 1498 } 1499 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1500 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1501 pcis->A_IB = ctx->A; 1502 ctx->A = NULL; 1503 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1504 pcis->A_BI = pcbddc->benign_original_mat; 1505 pcbddc->benign_original_mat = NULL; 1506 if (ctx->free) { 1507 PetscInt i; 1508 for (i=0;i<ctx->benign_n;i++) { 1509 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1510 } 1511 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1512 } 1513 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1514 ierr = PetscFree(ctx);CHKERRQ(ierr); 1515 } 1516 PetscFunctionReturn(0); 1517 } 1518 1519 /* used just in bddc debug mode */ 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "PCBDDCBenignProject" 1522 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1523 { 1524 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1525 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1526 Mat An; 1527 PetscErrorCode ierr; 1528 1529 PetscFunctionBegin; 1530 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1531 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1532 if (is1) { 1533 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1534 ierr = MatDestroy(&An);CHKERRQ(ierr); 1535 } else { 1536 *B = An; 1537 } 1538 PetscFunctionReturn(0); 1539 } 1540 1541 /* TODO: add reuse flag */ 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatSeqAIJCompress" 1544 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1545 { 1546 Mat Bt; 1547 PetscScalar *a,*bdata; 1548 const PetscInt *ii,*ij; 1549 PetscInt m,n,i,nnz,*bii,*bij; 1550 PetscBool flg_row; 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1555 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1556 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1557 nnz = n; 1558 for (i=0;i<ii[n];i++) { 1559 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1560 } 1561 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1562 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1563 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1564 nnz = 0; 1565 bii[0] = 0; 1566 for (i=0;i<n;i++) { 1567 PetscInt j; 1568 for (j=ii[i];j<ii[i+1];j++) { 1569 PetscScalar entry = a[j]; 1570 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1571 bij[nnz] = ij[j]; 1572 bdata[nnz] = entry; 1573 nnz++; 1574 } 1575 } 1576 bii[i+1] = nnz; 1577 } 1578 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1579 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1580 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1581 { 1582 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1583 b->free_a = PETSC_TRUE; 1584 b->free_ij = PETSC_TRUE; 1585 } 1586 *B = Bt; 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatDetectDisconnectedComponents" 1592 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1593 { 1594 Mat B; 1595 IS is_dummy,*cc_n; 1596 ISLocalToGlobalMapping l2gmap_dummy; 1597 PCBDDCGraph graph; 1598 PetscInt i,n; 1599 PetscInt *xadj,*adjncy; 1600 PetscInt *xadj_filtered,*adjncy_filtered; 1601 PetscBool flg_row,isseqaij; 1602 PetscErrorCode ierr; 1603 1604 PetscFunctionBegin; 1605 if (!A->rmap->N || !A->cmap->N) { 1606 *ncc = 0; 1607 *cc = NULL; 1608 PetscFunctionReturn(0); 1609 } 1610 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1611 if (!isseqaij && filter) { 1612 PetscBool isseqdense; 1613 1614 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1615 if (!isseqdense) { 1616 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1617 } else { /* TODO: rectangular case and LDA */ 1618 PetscScalar *array; 1619 PetscReal chop=1.e-6; 1620 1621 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1622 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1623 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1624 for (i=0;i<n;i++) { 1625 PetscInt j; 1626 for (j=i+1;j<n;j++) { 1627 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1628 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1629 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1630 } 1631 } 1632 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1633 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1634 } 1635 } else { 1636 B = A; 1637 } 1638 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1639 1640 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1641 if (filter) { 1642 PetscScalar *data; 1643 PetscInt j,cum; 1644 1645 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1646 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1647 cum = 0; 1648 for (i=0;i<n;i++) { 1649 PetscInt t; 1650 1651 for (j=xadj[i];j<xadj[i+1];j++) { 1652 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1653 continue; 1654 } 1655 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1656 } 1657 t = xadj_filtered[i]; 1658 xadj_filtered[i] = cum; 1659 cum += t; 1660 } 1661 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1662 } else { 1663 xadj_filtered = NULL; 1664 adjncy_filtered = NULL; 1665 } 1666 1667 /* compute local connected components using PCBDDCGraph */ 1668 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1669 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1670 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1671 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1672 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1673 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1674 if (xadj_filtered) { 1675 graph->xadj = xadj_filtered; 1676 graph->adjncy = adjncy_filtered; 1677 } else { 1678 graph->xadj = xadj; 1679 graph->adjncy = adjncy; 1680 } 1681 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1682 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1683 /* partial clean up */ 1684 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1685 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1686 if (A != B) { 1687 ierr = MatDestroy(&B);CHKERRQ(ierr); 1688 } 1689 1690 /* get back data */ 1691 if (ncc) *ncc = graph->ncc; 1692 if (cc) { 1693 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1694 for (i=0;i<graph->ncc;i++) { 1695 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1696 } 1697 *cc = cc_n; 1698 } 1699 /* clean up graph */ 1700 graph->xadj = 0; 1701 graph->adjncy = 0; 1702 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "PCBDDCBenignCheck" 1708 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1709 { 1710 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1711 PC_IS* pcis = (PC_IS*)(pc->data); 1712 IS dirIS = NULL; 1713 PetscInt i; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1718 if (zerodiag) { 1719 Mat A; 1720 Vec vec3_N; 1721 PetscScalar *vals; 1722 const PetscInt *idxs; 1723 PetscInt nz,*count; 1724 1725 /* p0 */ 1726 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1727 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1728 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1729 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1730 for (i=0;i<nz;i++) vals[i] = 1.; 1731 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1732 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1733 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1734 /* v_I */ 1735 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1736 for (i=0;i<nz;i++) vals[i] = 0.; 1737 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1738 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1739 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1740 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1741 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1742 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1743 if (dirIS) { 1744 PetscInt n; 1745 1746 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1747 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1748 for (i=0;i<n;i++) vals[i] = 0.; 1749 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1750 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1751 } 1752 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1753 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1754 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1755 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1756 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1757 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1758 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1759 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1760 ierr = PetscFree(vals);CHKERRQ(ierr); 1761 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1762 1763 /* there should not be any pressure dofs lying on the interface */ 1764 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1765 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1766 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1767 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1768 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1769 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1770 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1771 ierr = PetscFree(count);CHKERRQ(ierr); 1772 } 1773 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1774 1775 /* check PCBDDCBenignGetOrSetP0 */ 1776 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1777 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1778 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1779 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1780 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1781 for (i=0;i<pcbddc->benign_n;i++) { 1782 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1783 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1784 } 1785 PetscFunctionReturn(0); 1786 } 1787 1788 #undef __FUNCT__ 1789 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1790 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1791 { 1792 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1793 IS pressures,zerodiag,*zerodiag_subs; 1794 PetscInt nz,n; 1795 PetscInt *interior_dofs,n_interior_dofs; 1796 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1801 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1802 for (n=0;n<pcbddc->benign_n;n++) { 1803 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1804 } 1805 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1806 pcbddc->benign_n = 0; 1807 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1808 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1809 Checks if all the pressure dofs in each subdomain have a zero diagonal 1810 If not, a change of basis on pressures is not needed 1811 since the local Schur complements are already SPD 1812 */ 1813 has_null_pressures = PETSC_TRUE; 1814 have_null = PETSC_TRUE; 1815 if (pcbddc->n_ISForDofsLocal) { 1816 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1817 1818 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1819 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1820 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1821 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1822 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1823 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1824 if (!sorted) { 1825 ierr = ISSort(pressures);CHKERRQ(ierr); 1826 } 1827 } else { 1828 pressures = NULL; 1829 } 1830 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1831 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1832 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1833 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1834 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1835 if (!sorted) { 1836 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1837 } 1838 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1839 if (!nz) { 1840 if (n) have_null = PETSC_FALSE; 1841 has_null_pressures = PETSC_FALSE; 1842 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1843 } 1844 recompute_zerodiag = PETSC_FALSE; 1845 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1846 zerodiag_subs = NULL; 1847 pcbddc->benign_n = 0; 1848 n_interior_dofs = 0; 1849 interior_dofs = NULL; 1850 if (pcbddc->current_level) { /* need to compute interior nodes */ 1851 PetscInt n,i,j; 1852 PetscInt n_neigh,*neigh,*n_shared,**shared; 1853 PetscInt *iwork; 1854 1855 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1856 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1857 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1858 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1859 for (i=1;i<n_neigh;i++) 1860 for (j=0;j<n_shared[i];j++) 1861 iwork[shared[i][j]] += 1; 1862 for (i=0;i<n;i++) 1863 if (!iwork[i]) 1864 interior_dofs[n_interior_dofs++] = i; 1865 ierr = PetscFree(iwork);CHKERRQ(ierr); 1866 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1867 } 1868 if (has_null_pressures) { 1869 IS *subs; 1870 PetscInt nsubs,i,j,nl; 1871 const PetscInt *idxs; 1872 PetscScalar *array; 1873 Vec *work; 1874 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1875 1876 subs = pcbddc->local_subs; 1877 nsubs = pcbddc->n_local_subs; 1878 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1879 if (pcbddc->current_level) { 1880 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1881 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1882 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1883 /* work[0] = 1_p */ 1884 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1885 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1886 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1887 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1888 /* work[0] = 1_v */ 1889 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1890 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1891 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1892 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1893 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1894 } 1895 if (nsubs > 1) { 1896 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1897 for (i=0;i<nsubs;i++) { 1898 ISLocalToGlobalMapping l2g; 1899 IS t_zerodiag_subs; 1900 PetscInt nl; 1901 1902 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1903 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1904 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1905 if (nl) { 1906 PetscBool valid = PETSC_TRUE; 1907 1908 if (pcbddc->current_level) { 1909 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1910 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1911 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1912 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1913 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1914 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1915 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 1916 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 1917 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1918 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 1919 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1920 for (j=0;j<n_interior_dofs;j++) { 1921 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1922 valid = PETSC_FALSE; 1923 break; 1924 } 1925 } 1926 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1927 } 1928 if (valid && pcbddc->NeumannBoundariesLocal) { 1929 IS t_bc; 1930 PetscInt nzb; 1931 1932 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 1933 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 1934 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 1935 if (nzb) valid = PETSC_FALSE; 1936 } 1937 if (valid && pressures) { 1938 IS t_pressure_subs; 1939 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 1940 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 1941 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 1942 } 1943 if (valid) { 1944 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 1945 pcbddc->benign_n++; 1946 } else { 1947 recompute_zerodiag = PETSC_TRUE; 1948 } 1949 } 1950 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 1951 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 1952 } 1953 } else { /* there's just one subdomain (or zero if they have not been detected */ 1954 PetscBool valid = PETSC_TRUE; 1955 1956 if (pcbddc->NeumannBoundariesLocal) { 1957 PetscInt nzb; 1958 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 1959 if (nzb) valid = PETSC_FALSE; 1960 } 1961 if (valid && pressures) { 1962 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 1963 } 1964 if (valid && pcbddc->current_level) { 1965 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 1966 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 1967 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1968 for (j=0;j<n_interior_dofs;j++) { 1969 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1970 valid = PETSC_FALSE; 1971 break; 1972 } 1973 } 1974 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1975 } 1976 if (valid) { 1977 pcbddc->benign_n = 1; 1978 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 1979 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 1980 zerodiag_subs[0] = zerodiag; 1981 } 1982 } 1983 if (pcbddc->current_level) { 1984 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 1985 } 1986 } 1987 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 1988 1989 if (!pcbddc->benign_n) { 1990 PetscInt n; 1991 1992 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1993 recompute_zerodiag = PETSC_FALSE; 1994 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1995 if (n) { 1996 has_null_pressures = PETSC_FALSE; 1997 have_null = PETSC_FALSE; 1998 } 1999 } 2000 2001 /* final check for null pressures */ 2002 if (zerodiag && pressures) { 2003 PetscInt nz,np; 2004 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2005 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2006 if (nz != np) have_null = PETSC_FALSE; 2007 } 2008 2009 if (recompute_zerodiag) { 2010 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2011 if (pcbddc->benign_n == 1) { 2012 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2013 zerodiag = zerodiag_subs[0]; 2014 } else { 2015 PetscInt i,nzn,*new_idxs; 2016 2017 nzn = 0; 2018 for (i=0;i<pcbddc->benign_n;i++) { 2019 PetscInt ns; 2020 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2021 nzn += ns; 2022 } 2023 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2024 nzn = 0; 2025 for (i=0;i<pcbddc->benign_n;i++) { 2026 PetscInt ns,*idxs; 2027 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2028 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2029 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2030 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2031 nzn += ns; 2032 } 2033 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2034 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2035 } 2036 have_null = PETSC_FALSE; 2037 } 2038 2039 /* Prepare matrix to compute no-net-flux */ 2040 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2041 Mat A,loc_divudotp; 2042 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2043 IS row,col,isused = NULL; 2044 PetscInt M,N,n,st,n_isused; 2045 2046 if (pressures) { 2047 isused = pressures; 2048 } else { 2049 isused = zerodiag; 2050 } 2051 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2052 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2053 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2054 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2055 n_isused = 0; 2056 if (isused) { 2057 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2058 } 2059 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2060 st = st-n_isused; 2061 if (n) { 2062 const PetscInt *gidxs; 2063 2064 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2065 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2066 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2067 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2068 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2069 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2070 } else { 2071 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2072 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2073 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2074 } 2075 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2076 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2077 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2078 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2079 ierr = ISDestroy(&row);CHKERRQ(ierr); 2080 ierr = ISDestroy(&col);CHKERRQ(ierr); 2081 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2082 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2083 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2084 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2085 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2086 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2087 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2088 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2089 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2090 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2091 } 2092 2093 /* change of basis and p0 dofs */ 2094 if (has_null_pressures) { 2095 IS zerodiagc; 2096 const PetscInt *idxs,*idxsc; 2097 PetscInt i,s,*nnz; 2098 2099 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2100 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2101 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2102 /* local change of basis for pressures */ 2103 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2104 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2105 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2106 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2107 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2108 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2109 for (i=0;i<pcbddc->benign_n;i++) { 2110 PetscInt nzs,j; 2111 2112 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2113 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2114 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2115 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2116 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2117 } 2118 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2119 ierr = PetscFree(nnz);CHKERRQ(ierr); 2120 /* set identity on velocities */ 2121 for (i=0;i<n-nz;i++) { 2122 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2123 } 2124 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2125 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2126 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2127 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2128 /* set change on pressures */ 2129 for (s=0;s<pcbddc->benign_n;s++) { 2130 PetscScalar *array; 2131 PetscInt nzs; 2132 2133 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2134 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2135 for (i=0;i<nzs-1;i++) { 2136 PetscScalar vals[2]; 2137 PetscInt cols[2]; 2138 2139 cols[0] = idxs[i]; 2140 cols[1] = idxs[nzs-1]; 2141 vals[0] = 1.; 2142 vals[1] = 1.; 2143 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2144 } 2145 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2146 for (i=0;i<nzs-1;i++) array[i] = -1.; 2147 array[nzs-1] = 1.; 2148 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2149 /* store local idxs for p0 */ 2150 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2151 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2152 ierr = PetscFree(array);CHKERRQ(ierr); 2153 } 2154 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2155 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 /* project if needed */ 2157 if (pcbddc->benign_change_explicit) { 2158 Mat M; 2159 2160 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2161 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2162 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2163 ierr = MatDestroy(&M);CHKERRQ(ierr); 2164 } 2165 /* store global idxs for p0 */ 2166 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2167 } 2168 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2169 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2170 2171 /* determines if the coarse solver will be singular or not */ 2172 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2173 /* determines if the problem has subdomains with 0 pressure block */ 2174 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2175 *zerodiaglocal = zerodiag; 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2181 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2182 { 2183 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2184 PetscScalar *array; 2185 PetscErrorCode ierr; 2186 2187 PetscFunctionBegin; 2188 if (!pcbddc->benign_sf) { 2189 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2190 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2191 } 2192 if (get) { 2193 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2194 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2195 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2196 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2197 } else { 2198 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2199 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2200 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2201 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2202 } 2203 PetscFunctionReturn(0); 2204 } 2205 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2208 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2209 { 2210 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2211 PetscErrorCode ierr; 2212 2213 PetscFunctionBegin; 2214 /* TODO: add error checking 2215 - avoid nested pop (or push) calls. 2216 - cannot push before pop. 2217 - cannot call this if pcbddc->local_mat is NULL 2218 */ 2219 if (!pcbddc->benign_n) { 2220 PetscFunctionReturn(0); 2221 } 2222 if (pop) { 2223 if (pcbddc->benign_change_explicit) { 2224 IS is_p0; 2225 MatReuse reuse; 2226 2227 /* extract B_0 */ 2228 reuse = MAT_INITIAL_MATRIX; 2229 if (pcbddc->benign_B0) { 2230 reuse = MAT_REUSE_MATRIX; 2231 } 2232 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2233 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2234 /* remove rows and cols from local problem */ 2235 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2236 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2237 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2238 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2239 } else { 2240 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2241 PetscScalar *vals; 2242 PetscInt i,n,*idxs_ins; 2243 2244 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2245 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2246 if (!pcbddc->benign_B0) { 2247 PetscInt *nnz; 2248 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2249 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2250 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2251 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2252 for (i=0;i<pcbddc->benign_n;i++) { 2253 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2254 nnz[i] = n - nnz[i]; 2255 } 2256 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz);CHKERRQ(ierr); 2258 } 2259 2260 for (i=0;i<pcbddc->benign_n;i++) { 2261 PetscScalar *array; 2262 PetscInt *idxs,j,nz,cum; 2263 2264 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2265 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2266 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2267 for (j=0;j<nz;j++) vals[j] = 1.; 2268 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2269 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2270 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2271 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2272 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2273 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2274 cum = 0; 2275 for (j=0;j<n;j++) { 2276 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2277 vals[cum] = array[j]; 2278 idxs_ins[cum] = j; 2279 cum++; 2280 } 2281 } 2282 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2283 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2284 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2285 } 2286 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2287 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2288 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2289 } 2290 } else { /* push */ 2291 if (pcbddc->benign_change_explicit) { 2292 PetscInt i; 2293 2294 for (i=0;i<pcbddc->benign_n;i++) { 2295 PetscScalar *B0_vals; 2296 PetscInt *B0_cols,B0_ncol; 2297 2298 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2299 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2300 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2301 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2302 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2303 } 2304 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2305 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2306 } else { 2307 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2308 } 2309 } 2310 PetscFunctionReturn(0); 2311 } 2312 2313 #undef __FUNCT__ 2314 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2315 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2316 { 2317 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2318 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2319 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2320 PetscBLASInt *B_iwork,*B_ifail; 2321 PetscScalar *work,lwork; 2322 PetscScalar *St,*S,*eigv; 2323 PetscScalar *Sarray,*Starray; 2324 PetscReal *eigs,thresh; 2325 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2326 PetscBool allocated_S_St; 2327 #if defined(PETSC_USE_COMPLEX) 2328 PetscReal *rwork; 2329 #endif 2330 PetscErrorCode ierr; 2331 2332 PetscFunctionBegin; 2333 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2334 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2335 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2336 2337 if (pcbddc->dbg_flag) { 2338 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2339 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2340 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2341 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2342 } 2343 2344 if (pcbddc->dbg_flag) { 2345 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2346 } 2347 2348 /* max size of subsets */ 2349 mss = 0; 2350 for (i=0;i<sub_schurs->n_subs;i++) { 2351 PetscInt subset_size; 2352 2353 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2354 mss = PetscMax(mss,subset_size); 2355 } 2356 2357 /* min/max and threshold */ 2358 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2359 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2360 nmax = PetscMax(nmin,nmax); 2361 allocated_S_St = PETSC_FALSE; 2362 if (nmin) { 2363 allocated_S_St = PETSC_TRUE; 2364 } 2365 2366 /* allocate lapack workspace */ 2367 cum = cum2 = 0; 2368 maxneigs = 0; 2369 for (i=0;i<sub_schurs->n_subs;i++) { 2370 PetscInt n,subset_size; 2371 2372 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2373 n = PetscMin(subset_size,nmax); 2374 cum += subset_size; 2375 cum2 += subset_size*n; 2376 maxneigs = PetscMax(maxneigs,n); 2377 } 2378 if (mss) { 2379 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2380 PetscBLASInt B_itype = 1; 2381 PetscBLASInt B_N = mss; 2382 PetscReal zero = 0.0; 2383 PetscReal eps = 0.0; /* dlamch? */ 2384 2385 B_lwork = -1; 2386 S = NULL; 2387 St = NULL; 2388 eigs = NULL; 2389 eigv = NULL; 2390 B_iwork = NULL; 2391 B_ifail = NULL; 2392 #if defined(PETSC_USE_COMPLEX) 2393 rwork = NULL; 2394 #endif 2395 thresh = 1.0; 2396 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2397 #if defined(PETSC_USE_COMPLEX) 2398 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2399 #else 2400 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2401 #endif 2402 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2403 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2404 } else { 2405 /* TODO */ 2406 } 2407 } else { 2408 lwork = 0; 2409 } 2410 2411 nv = 0; 2412 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2413 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2414 } 2415 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2416 if (allocated_S_St) { 2417 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2418 } 2419 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2420 #if defined(PETSC_USE_COMPLEX) 2421 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2422 #endif 2423 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2424 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2425 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2426 nv+cum,&pcbddc->adaptive_constraints_idxs, 2427 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2428 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2429 2430 maxneigs = 0; 2431 cum = cumarray = 0; 2432 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2433 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2434 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2435 const PetscInt *idxs; 2436 2437 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2438 for (cum=0;cum<nv;cum++) { 2439 pcbddc->adaptive_constraints_n[cum] = 1; 2440 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2441 pcbddc->adaptive_constraints_data[cum] = 1.0; 2442 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2443 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2444 } 2445 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2446 } 2447 2448 if (mss) { /* multilevel */ 2449 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2450 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2451 } 2452 2453 thresh = pcbddc->adaptive_threshold; 2454 for (i=0;i<sub_schurs->n_subs;i++) { 2455 const PetscInt *idxs; 2456 PetscReal upper,lower; 2457 PetscInt j,subset_size,eigs_start = 0; 2458 PetscBLASInt B_N; 2459 PetscBool same_data = PETSC_FALSE; 2460 2461 if (pcbddc->use_deluxe_scaling) { 2462 upper = PETSC_MAX_REAL; 2463 lower = thresh; 2464 } else { 2465 upper = 1./thresh; 2466 lower = 0.; 2467 } 2468 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2469 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2470 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2471 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2472 if (sub_schurs->is_hermitian) { 2473 PetscInt j,k; 2474 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2475 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2476 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2477 } 2478 for (j=0;j<subset_size;j++) { 2479 for (k=j;k<subset_size;k++) { 2480 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2481 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2482 } 2483 } 2484 } else { 2485 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2486 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2487 } 2488 } else { 2489 S = Sarray + cumarray; 2490 St = Starray + cumarray; 2491 } 2492 /* see if we can save some work */ 2493 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2494 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2495 } 2496 2497 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2498 B_neigs = 0; 2499 } else { 2500 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2501 PetscBLASInt B_itype = 1; 2502 PetscBLASInt B_IL, B_IU; 2503 PetscReal eps = -1.0; /* dlamch? */ 2504 PetscInt nmin_s; 2505 PetscBool compute_range = PETSC_FALSE; 2506 2507 if (pcbddc->dbg_flag) { 2508 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2509 } 2510 2511 compute_range = PETSC_FALSE; 2512 if (thresh > 1.+PETSC_SMALL && !same_data) { 2513 compute_range = PETSC_TRUE; 2514 } 2515 2516 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2517 if (compute_range) { 2518 2519 /* ask for eigenvalues larger than thresh */ 2520 #if defined(PETSC_USE_COMPLEX) 2521 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2522 #else 2523 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2524 #endif 2525 } else if (!same_data) { 2526 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2527 B_IL = 1; 2528 #if defined(PETSC_USE_COMPLEX) 2529 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2530 #else 2531 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2532 #endif 2533 } else { /* same_data is true, so get the adaptive function requested by the user */ 2534 PetscInt k; 2535 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2536 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2537 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2538 nmin = nmax; 2539 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2540 for (k=0;k<nmax;k++) { 2541 eigs[k] = 1./PETSC_SMALL; 2542 eigv[k*(subset_size+1)] = 1.0; 2543 } 2544 } 2545 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2546 if (B_ierr) { 2547 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2548 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2549 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2550 } 2551 2552 if (B_neigs > nmax) { 2553 if (pcbddc->dbg_flag) { 2554 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2555 } 2556 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2557 B_neigs = nmax; 2558 } 2559 2560 nmin_s = PetscMin(nmin,B_N); 2561 if (B_neigs < nmin_s) { 2562 PetscBLASInt B_neigs2; 2563 2564 if (pcbddc->use_deluxe_scaling) { 2565 B_IL = B_N - nmin_s + 1; 2566 B_IU = B_N - B_neigs; 2567 } else { 2568 B_IL = B_neigs + 1; 2569 B_IU = nmin_s; 2570 } 2571 if (pcbddc->dbg_flag) { 2572 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2573 } 2574 if (sub_schurs->is_hermitian) { 2575 PetscInt j,k; 2576 for (j=0;j<subset_size;j++) { 2577 for (k=j;k<subset_size;k++) { 2578 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2579 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2580 } 2581 } 2582 } else { 2583 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2584 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2585 } 2586 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2587 #if defined(PETSC_USE_COMPLEX) 2588 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2589 #else 2590 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2591 #endif 2592 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2593 B_neigs += B_neigs2; 2594 } 2595 if (B_ierr) { 2596 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2597 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2598 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2599 } 2600 if (pcbddc->dbg_flag) { 2601 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2602 for (j=0;j<B_neigs;j++) { 2603 if (eigs[j] == 0.0) { 2604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2605 } else { 2606 if (pcbddc->use_deluxe_scaling) { 2607 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2608 } else { 2609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2610 } 2611 } 2612 } 2613 } 2614 } else { 2615 /* TODO */ 2616 } 2617 } 2618 /* change the basis back to the original one */ 2619 if (sub_schurs->change) { 2620 Mat change,phi,phit; 2621 2622 if (pcbddc->dbg_flag > 1) { 2623 PetscInt ii; 2624 for (ii=0;ii<B_neigs;ii++) { 2625 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2626 for (j=0;j<B_N;j++) { 2627 #if defined(PETSC_USE_COMPLEX) 2628 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2629 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2630 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2631 #else 2632 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2633 #endif 2634 } 2635 } 2636 } 2637 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2638 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2639 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2640 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2641 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2642 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2643 } 2644 maxneigs = PetscMax(B_neigs,maxneigs); 2645 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2646 if (B_neigs) { 2647 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2648 2649 if (pcbddc->dbg_flag > 1) { 2650 PetscInt ii; 2651 for (ii=0;ii<B_neigs;ii++) { 2652 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2653 for (j=0;j<B_N;j++) { 2654 #if defined(PETSC_USE_COMPLEX) 2655 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2656 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2657 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2658 #else 2659 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2660 #endif 2661 } 2662 } 2663 } 2664 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2665 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2666 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2667 cum++; 2668 } 2669 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2670 /* shift for next computation */ 2671 cumarray += subset_size*subset_size; 2672 } 2673 if (pcbddc->dbg_flag) { 2674 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2675 } 2676 2677 if (mss) { 2678 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2679 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2680 /* destroy matrices (junk) */ 2681 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2682 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2683 } 2684 if (allocated_S_St) { 2685 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2686 } 2687 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2688 #if defined(PETSC_USE_COMPLEX) 2689 ierr = PetscFree(rwork);CHKERRQ(ierr); 2690 #endif 2691 if (pcbddc->dbg_flag) { 2692 PetscInt maxneigs_r; 2693 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2694 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2695 } 2696 PetscFunctionReturn(0); 2697 } 2698 2699 #undef __FUNCT__ 2700 #define __FUNCT__ "PCBDDCSetUpSolvers" 2701 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2702 { 2703 PetscScalar *coarse_submat_vals; 2704 PetscErrorCode ierr; 2705 2706 PetscFunctionBegin; 2707 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2708 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2709 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2710 2711 /* Setup local neumann solver ksp_R */ 2712 /* PCBDDCSetUpLocalScatters should be called first! */ 2713 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2714 2715 /* 2716 Setup local correction and local part of coarse basis. 2717 Gives back the dense local part of the coarse matrix in column major ordering 2718 */ 2719 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2720 2721 /* Compute total number of coarse nodes and setup coarse solver */ 2722 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2723 2724 /* free */ 2725 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2726 PetscFunctionReturn(0); 2727 } 2728 2729 #undef __FUNCT__ 2730 #define __FUNCT__ "PCBDDCResetCustomization" 2731 PetscErrorCode PCBDDCResetCustomization(PC pc) 2732 { 2733 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2734 PetscErrorCode ierr; 2735 2736 PetscFunctionBegin; 2737 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2738 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2739 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2740 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2741 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2742 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2743 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2744 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2745 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2746 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 #undef __FUNCT__ 2751 #define __FUNCT__ "PCBDDCResetTopography" 2752 PetscErrorCode PCBDDCResetTopography(PC pc) 2753 { 2754 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2755 PetscInt i; 2756 PetscErrorCode ierr; 2757 2758 PetscFunctionBegin; 2759 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2760 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2761 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2762 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2763 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2764 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2765 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2766 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2767 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2768 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2769 for (i=0;i<pcbddc->n_local_subs;i++) { 2770 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2771 } 2772 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2773 if (pcbddc->sub_schurs) { 2774 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2775 } 2776 pcbddc->graphanalyzed = PETSC_FALSE; 2777 PetscFunctionReturn(0); 2778 } 2779 2780 #undef __FUNCT__ 2781 #define __FUNCT__ "PCBDDCResetSolvers" 2782 PetscErrorCode PCBDDCResetSolvers(PC pc) 2783 { 2784 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2785 PetscErrorCode ierr; 2786 2787 PetscFunctionBegin; 2788 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2789 if (pcbddc->coarse_phi_B) { 2790 PetscScalar *array; 2791 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2792 ierr = PetscFree(array);CHKERRQ(ierr); 2793 } 2794 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2795 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2796 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2797 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2798 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2799 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2800 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2801 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2802 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2803 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2804 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2805 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2806 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2807 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2808 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2809 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2810 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2811 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2812 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2813 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2814 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2815 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2816 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2817 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2818 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2819 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2820 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2821 if (pcbddc->benign_zerodiag_subs) { 2822 PetscInt i; 2823 for (i=0;i<pcbddc->benign_n;i++) { 2824 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2825 } 2826 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2827 } 2828 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2829 PetscFunctionReturn(0); 2830 } 2831 2832 #undef __FUNCT__ 2833 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2834 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2835 { 2836 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2837 PC_IS *pcis = (PC_IS*)pc->data; 2838 VecType impVecType; 2839 PetscInt n_constraints,n_R,old_size; 2840 PetscErrorCode ierr; 2841 2842 PetscFunctionBegin; 2843 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2844 /* get sizes */ 2845 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2846 n_R = pcis->n - pcbddc->n_vertices; 2847 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2848 /* local work vectors (try to avoid unneeded work)*/ 2849 /* R nodes */ 2850 old_size = -1; 2851 if (pcbddc->vec1_R) { 2852 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2853 } 2854 if (n_R != old_size) { 2855 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2856 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2857 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2858 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2859 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2860 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2861 } 2862 /* local primal dofs */ 2863 old_size = -1; 2864 if (pcbddc->vec1_P) { 2865 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2866 } 2867 if (pcbddc->local_primal_size != old_size) { 2868 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2869 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2870 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2871 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2872 } 2873 /* local explicit constraints */ 2874 old_size = -1; 2875 if (pcbddc->vec1_C) { 2876 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2877 } 2878 if (n_constraints && n_constraints != old_size) { 2879 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2880 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2881 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2882 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2883 } 2884 PetscFunctionReturn(0); 2885 } 2886 2887 #undef __FUNCT__ 2888 #define __FUNCT__ "PCBDDCSetUpCorrection" 2889 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2890 { 2891 PetscErrorCode ierr; 2892 /* pointers to pcis and pcbddc */ 2893 PC_IS* pcis = (PC_IS*)pc->data; 2894 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2895 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2896 /* submatrices of local problem */ 2897 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2898 /* submatrices of local coarse problem */ 2899 Mat S_VV,S_CV,S_VC,S_CC; 2900 /* working matrices */ 2901 Mat C_CR; 2902 /* additional working stuff */ 2903 PC pc_R; 2904 Mat F; 2905 Vec dummy_vec; 2906 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2907 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2908 PetscScalar *work; 2909 PetscInt *idx_V_B; 2910 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2911 PetscInt i,n_R,n_D,n_B; 2912 2913 /* some shortcuts to scalars */ 2914 PetscScalar one=1.0,m_one=-1.0; 2915 2916 PetscFunctionBegin; 2917 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 2918 2919 /* Set Non-overlapping dimensions */ 2920 n_vertices = pcbddc->n_vertices; 2921 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 2922 n_B = pcis->n_B; 2923 n_D = pcis->n - n_B; 2924 n_R = pcis->n - n_vertices; 2925 2926 /* vertices in boundary numbering */ 2927 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 2928 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 2929 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 2930 2931 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2932 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 2933 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 2934 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 2935 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 2936 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 2937 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 2938 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 2939 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 2940 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 2941 2942 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 2943 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 2944 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 2945 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 2946 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 2947 lda_rhs = n_R; 2948 need_benign_correction = PETSC_FALSE; 2949 if (isLU || isILU || isCHOL) { 2950 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 2951 } else if (sub_schurs && sub_schurs->reuse_solver) { 2952 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2953 MatFactorType type; 2954 2955 F = reuse_solver->F; 2956 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 2957 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 2958 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 2959 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 2960 } else { 2961 F = NULL; 2962 } 2963 2964 /* allocate workspace */ 2965 n = 0; 2966 if (n_constraints) { 2967 n += lda_rhs*n_constraints; 2968 } 2969 if (n_vertices) { 2970 n = PetscMax(2*lda_rhs*n_vertices,n); 2971 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 2972 } 2973 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 2974 2975 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 2976 dummy_vec = NULL; 2977 if (need_benign_correction && lda_rhs != n_R && F) { 2978 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 2979 } 2980 2981 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2982 if (n_constraints) { 2983 Mat M1,M2,M3,C_B; 2984 IS is_aux; 2985 PetscScalar *array,*array2; 2986 2987 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2988 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2989 2990 /* Extract constraints on R nodes: C_{CR} */ 2991 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 2992 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2993 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2994 2995 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 2996 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 2997 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2998 for (i=0;i<n_constraints;i++) { 2999 const PetscScalar *row_cmat_values; 3000 const PetscInt *row_cmat_indices; 3001 PetscInt size_of_constraint,j; 3002 3003 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3004 for (j=0;j<size_of_constraint;j++) { 3005 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3006 } 3007 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3008 } 3009 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3010 if (F) { 3011 Mat B; 3012 3013 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3014 if (need_benign_correction) { 3015 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3016 3017 /* rhs is already zero on interior dofs, no need to change the rhs */ 3018 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3019 } 3020 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3021 if (need_benign_correction) { 3022 PetscScalar *marr; 3023 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3024 3025 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3026 if (lda_rhs != n_R) { 3027 for (i=0;i<n_constraints;i++) { 3028 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3029 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3030 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3031 } 3032 } else { 3033 for (i=0;i<n_constraints;i++) { 3034 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3035 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3036 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3037 } 3038 } 3039 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3040 } 3041 ierr = MatDestroy(&B);CHKERRQ(ierr); 3042 } else { 3043 PetscScalar *marr; 3044 3045 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3046 for (i=0;i<n_constraints;i++) { 3047 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3048 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3049 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3050 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3051 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3052 } 3053 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3054 } 3055 if (!pcbddc->switch_static) { 3056 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3057 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3058 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3059 for (i=0;i<n_constraints;i++) { 3060 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3061 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3062 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3063 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3064 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3065 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3066 } 3067 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3068 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3069 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3070 } else { 3071 if (lda_rhs != n_R) { 3072 IS dummy; 3073 3074 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3075 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3076 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3077 } else { 3078 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3079 pcbddc->local_auxmat2 = local_auxmat2_R; 3080 } 3081 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3082 } 3083 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3084 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3085 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3086 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3087 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3088 if (isCHOL) { 3089 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3090 } else { 3091 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3092 } 3093 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3094 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3095 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3096 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3097 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3098 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3099 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3100 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3101 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3102 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3103 } 3104 3105 /* Get submatrices from subdomain matrix */ 3106 if (n_vertices) { 3107 IS is_aux; 3108 3109 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3110 IS tis; 3111 3112 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3113 ierr = ISSort(tis);CHKERRQ(ierr); 3114 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3115 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3116 } else { 3117 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3118 } 3119 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3120 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3121 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3122 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3123 } 3124 3125 /* Matrix of coarse basis functions (local) */ 3126 if (pcbddc->coarse_phi_B) { 3127 PetscInt on_B,on_primal,on_D=n_D; 3128 if (pcbddc->coarse_phi_D) { 3129 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3130 } 3131 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3132 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3133 PetscScalar *marray; 3134 3135 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3136 ierr = PetscFree(marray);CHKERRQ(ierr); 3137 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3138 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3139 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3140 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3141 } 3142 } 3143 3144 if (!pcbddc->coarse_phi_B) { 3145 PetscScalar *marray; 3146 3147 n = n_B*pcbddc->local_primal_size; 3148 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3149 n += n_D*pcbddc->local_primal_size; 3150 } 3151 if (!pcbddc->symmetric_primal) { 3152 n *= 2; 3153 } 3154 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3156 n = n_B*pcbddc->local_primal_size; 3157 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3158 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3159 n += n_D*pcbddc->local_primal_size; 3160 } 3161 if (!pcbddc->symmetric_primal) { 3162 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3163 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3164 n = n_B*pcbddc->local_primal_size; 3165 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3166 } 3167 } else { 3168 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3169 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3170 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3171 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3172 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3173 } 3174 } 3175 } 3176 3177 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3178 p0_lidx_I = NULL; 3179 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3180 const PetscInt *idxs; 3181 3182 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3183 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3184 for (i=0;i<pcbddc->benign_n;i++) { 3185 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3186 } 3187 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3188 } 3189 3190 /* vertices */ 3191 if (n_vertices) { 3192 3193 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3194 3195 if (n_R) { 3196 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3197 PetscBLASInt B_N,B_one = 1; 3198 PetscScalar *x,*y; 3199 PetscBool isseqaij; 3200 3201 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3202 if (need_benign_correction) { 3203 ISLocalToGlobalMapping RtoN; 3204 IS is_p0; 3205 PetscInt *idxs_p0,n; 3206 3207 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3208 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3209 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3210 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3211 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3212 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3213 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3214 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3215 } 3216 3217 if (lda_rhs == n_R) { 3218 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3219 } else { 3220 PetscScalar *av,*array; 3221 const PetscInt *xadj,*adjncy; 3222 PetscInt n; 3223 PetscBool flg_row; 3224 3225 array = work+lda_rhs*n_vertices; 3226 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3227 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3228 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3229 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3230 for (i=0;i<n;i++) { 3231 PetscInt j; 3232 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3233 } 3234 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3235 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3236 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3237 } 3238 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3239 if (need_benign_correction) { 3240 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3241 PetscScalar *marr; 3242 3243 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3244 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3245 3246 | 0 0 0 | (V) 3247 L = | 0 0 -1 | (P-p0) 3248 | 0 0 -1 | (p0) 3249 3250 */ 3251 for (i=0;i<reuse_solver->benign_n;i++) { 3252 const PetscScalar *vals; 3253 const PetscInt *idxs,*idxs_zero; 3254 PetscInt n,j,nz; 3255 3256 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3257 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3258 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3259 for (j=0;j<n;j++) { 3260 PetscScalar val = vals[j]; 3261 PetscInt k,col = idxs[j]; 3262 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3263 } 3264 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3265 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3266 } 3267 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3268 } 3269 if (F) { 3270 /* need to correct the rhs */ 3271 if (need_benign_correction) { 3272 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3273 PetscScalar *marr; 3274 3275 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3276 if (lda_rhs != n_R) { 3277 for (i=0;i<n_vertices;i++) { 3278 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3279 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3280 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3281 } 3282 } else { 3283 for (i=0;i<n_vertices;i++) { 3284 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3285 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3286 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3287 } 3288 } 3289 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3290 } 3291 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3292 /* need to correct the solution */ 3293 if (need_benign_correction) { 3294 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3295 PetscScalar *marr; 3296 3297 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3298 if (lda_rhs != n_R) { 3299 for (i=0;i<n_vertices;i++) { 3300 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3301 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3302 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3303 } 3304 } else { 3305 for (i=0;i<n_vertices;i++) { 3306 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3307 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3308 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3309 } 3310 } 3311 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3312 } 3313 } else { 3314 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3315 for (i=0;i<n_vertices;i++) { 3316 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3317 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3318 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3319 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3320 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3321 } 3322 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3323 } 3324 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3325 /* S_VV and S_CV */ 3326 if (n_constraints) { 3327 Mat B; 3328 3329 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3330 for (i=0;i<n_vertices;i++) { 3331 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3332 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3333 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3334 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3335 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3336 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3337 } 3338 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3339 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3340 ierr = MatDestroy(&B);CHKERRQ(ierr); 3341 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3342 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3343 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3344 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3345 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3346 ierr = MatDestroy(&B);CHKERRQ(ierr); 3347 } 3348 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3349 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3350 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3351 } 3352 if (lda_rhs != n_R) { 3353 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3354 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3355 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3356 } 3357 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3358 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3359 if (need_benign_correction) { 3360 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3361 PetscScalar *marr,*sums; 3362 3363 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3364 ierr = MatDenseGetArray(S_VVt,&marr); 3365 for (i=0;i<reuse_solver->benign_n;i++) { 3366 const PetscScalar *vals; 3367 const PetscInt *idxs,*idxs_zero; 3368 PetscInt n,j,nz; 3369 3370 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3371 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3372 for (j=0;j<n_vertices;j++) { 3373 PetscInt k; 3374 sums[j] = 0.; 3375 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3376 } 3377 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3378 for (j=0;j<n;j++) { 3379 PetscScalar val = vals[j]; 3380 PetscInt k; 3381 for (k=0;k<n_vertices;k++) { 3382 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3383 } 3384 } 3385 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3386 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3387 } 3388 ierr = PetscFree(sums);CHKERRQ(ierr); 3389 ierr = MatDenseRestoreArray(S_VVt,&marr); 3390 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3391 } 3392 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3393 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3394 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3395 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3396 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3397 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3398 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3399 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3400 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3401 } else { 3402 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3403 } 3404 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3405 3406 /* coarse basis functions */ 3407 for (i=0;i<n_vertices;i++) { 3408 PetscScalar *y; 3409 3410 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3411 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3412 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3413 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3414 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3415 y[n_B*i+idx_V_B[i]] = 1.0; 3416 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3417 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3418 3419 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3420 PetscInt j; 3421 3422 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3423 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3424 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3425 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3426 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3427 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3428 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3429 } 3430 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3431 } 3432 /* if n_R == 0 the object is not destroyed */ 3433 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3434 } 3435 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3436 3437 if (n_constraints) { 3438 Mat B; 3439 3440 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3441 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3442 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3443 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3444 if (n_vertices) { 3445 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3446 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3447 } else { 3448 Mat S_VCt; 3449 3450 if (lda_rhs != n_R) { 3451 ierr = MatDestroy(&B);CHKERRQ(ierr); 3452 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3453 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3454 } 3455 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3456 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3457 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3458 } 3459 } 3460 ierr = MatDestroy(&B);CHKERRQ(ierr); 3461 /* coarse basis functions */ 3462 for (i=0;i<n_constraints;i++) { 3463 PetscScalar *y; 3464 3465 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3466 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3467 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3468 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3469 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3470 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3471 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3472 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3473 PetscInt j; 3474 3475 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3476 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3477 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3478 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3479 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3480 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3481 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3482 } 3483 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3484 } 3485 } 3486 if (n_constraints) { 3487 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3488 } 3489 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3490 3491 /* coarse matrix entries relative to B_0 */ 3492 if (pcbddc->benign_n) { 3493 Mat B0_B,B0_BPHI; 3494 IS is_dummy; 3495 PetscScalar *data; 3496 PetscInt j; 3497 3498 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3499 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3500 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3501 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3502 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3503 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3504 for (j=0;j<pcbddc->benign_n;j++) { 3505 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3506 for (i=0;i<pcbddc->local_primal_size;i++) { 3507 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3508 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3509 } 3510 } 3511 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3512 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3513 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3514 } 3515 3516 /* compute other basis functions for non-symmetric problems */ 3517 if (!pcbddc->symmetric_primal) { 3518 Mat B_V=NULL,B_C=NULL; 3519 PetscScalar *marray; 3520 3521 if (n_constraints) { 3522 Mat S_CCT,C_CRT; 3523 3524 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3525 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3526 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3527 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3528 if (n_vertices) { 3529 Mat S_VCT; 3530 3531 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3532 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3533 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3534 } 3535 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3536 } else { 3537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3538 } 3539 if (n_vertices && n_R) { 3540 PetscScalar *av,*marray; 3541 const PetscInt *xadj,*adjncy; 3542 PetscInt n; 3543 PetscBool flg_row; 3544 3545 /* B_V = B_V - A_VR^T */ 3546 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3547 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3548 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3549 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3550 for (i=0;i<n;i++) { 3551 PetscInt j; 3552 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3553 } 3554 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3555 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3556 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3557 } 3558 3559 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3560 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3561 for (i=0;i<n_vertices;i++) { 3562 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3563 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3564 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3565 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3566 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3567 } 3568 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3569 if (B_C) { 3570 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3571 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3572 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3573 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3574 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3575 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3576 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3577 } 3578 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3579 } 3580 /* coarse basis functions */ 3581 for (i=0;i<pcbddc->local_primal_size;i++) { 3582 PetscScalar *y; 3583 3584 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3585 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3586 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3587 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3588 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3589 if (i<n_vertices) { 3590 y[n_B*i+idx_V_B[i]] = 1.0; 3591 } 3592 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3593 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3594 3595 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3596 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3597 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3598 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3599 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3600 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3601 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3602 } 3603 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3604 } 3605 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3606 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3607 } 3608 /* free memory */ 3609 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3610 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3611 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3612 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3613 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3614 ierr = PetscFree(work);CHKERRQ(ierr); 3615 if (n_vertices) { 3616 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3617 } 3618 if (n_constraints) { 3619 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3620 } 3621 /* Checking coarse_sub_mat and coarse basis functios */ 3622 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3623 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3624 if (pcbddc->dbg_flag) { 3625 Mat coarse_sub_mat; 3626 Mat AUXMAT,TM1,TM2,TM3,TM4; 3627 Mat coarse_phi_D,coarse_phi_B; 3628 Mat coarse_psi_D,coarse_psi_B; 3629 Mat A_II,A_BB,A_IB,A_BI; 3630 Mat C_B,CPHI; 3631 IS is_dummy; 3632 Vec mones; 3633 MatType checkmattype=MATSEQAIJ; 3634 PetscReal real_value; 3635 3636 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3637 Mat A; 3638 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3639 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3640 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3641 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3642 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3643 ierr = MatDestroy(&A);CHKERRQ(ierr); 3644 } else { 3645 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3646 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3647 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3648 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3649 } 3650 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3651 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3652 if (!pcbddc->symmetric_primal) { 3653 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3654 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3655 } 3656 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3657 3658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3660 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3661 if (!pcbddc->symmetric_primal) { 3662 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3663 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3664 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3665 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3666 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3667 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3668 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3669 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3670 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3671 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3672 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3674 } else { 3675 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3676 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3677 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3678 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3679 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3680 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3681 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3682 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3683 } 3684 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3685 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3686 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3687 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3688 if (pcbddc->benign_n) { 3689 Mat B0_B,B0_BPHI; 3690 PetscScalar *data,*data2; 3691 PetscInt j; 3692 3693 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3694 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3695 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3696 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3697 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3698 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3699 for (j=0;j<pcbddc->benign_n;j++) { 3700 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3701 for (i=0;i<pcbddc->local_primal_size;i++) { 3702 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3703 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3704 } 3705 } 3706 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3707 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3708 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3709 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3710 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3711 } 3712 #if 0 3713 { 3714 PetscViewer viewer; 3715 char filename[256]; 3716 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3717 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3718 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3719 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3720 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3721 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3722 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3723 if (save_change) { 3724 Mat phi_B; 3725 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3726 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3727 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3728 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3729 } else { 3730 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3731 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3732 } 3733 if (pcbddc->coarse_phi_D) { 3734 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3735 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3736 } 3737 if (pcbddc->coarse_psi_B) { 3738 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3739 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3740 } 3741 if (pcbddc->coarse_psi_D) { 3742 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3743 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3744 } 3745 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3746 } 3747 #endif 3748 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3749 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3750 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3751 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3752 3753 /* check constraints */ 3754 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3755 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3756 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3757 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3758 } else { 3759 PetscScalar *data; 3760 Mat tmat; 3761 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3762 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3763 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3764 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3765 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3766 } 3767 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3768 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3769 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3770 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3771 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3772 if (!pcbddc->symmetric_primal) { 3773 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3774 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3775 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3776 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3777 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3778 } 3779 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3780 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3781 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3782 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3783 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3784 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3785 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3786 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3787 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3788 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3789 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3790 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3791 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3792 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3793 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3794 if (!pcbddc->symmetric_primal) { 3795 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3796 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3797 } 3798 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3799 } 3800 /* get back data */ 3801 *coarse_submat_vals_n = coarse_submat_vals; 3802 PetscFunctionReturn(0); 3803 } 3804 3805 #undef __FUNCT__ 3806 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3807 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3808 { 3809 Mat *work_mat; 3810 IS isrow_s,iscol_s; 3811 PetscBool rsorted,csorted; 3812 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3813 PetscErrorCode ierr; 3814 3815 PetscFunctionBegin; 3816 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3817 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3818 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3819 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3820 3821 if (!rsorted) { 3822 const PetscInt *idxs; 3823 PetscInt *idxs_sorted,i; 3824 3825 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3826 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3827 for (i=0;i<rsize;i++) { 3828 idxs_perm_r[i] = i; 3829 } 3830 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3831 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3832 for (i=0;i<rsize;i++) { 3833 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3834 } 3835 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3836 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3837 } else { 3838 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3839 isrow_s = isrow; 3840 } 3841 3842 if (!csorted) { 3843 if (isrow == iscol) { 3844 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3845 iscol_s = isrow_s; 3846 } else { 3847 const PetscInt *idxs; 3848 PetscInt *idxs_sorted,i; 3849 3850 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3851 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3852 for (i=0;i<csize;i++) { 3853 idxs_perm_c[i] = i; 3854 } 3855 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3856 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3857 for (i=0;i<csize;i++) { 3858 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3859 } 3860 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3861 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3862 } 3863 } else { 3864 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3865 iscol_s = iscol; 3866 } 3867 3868 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3869 3870 if (!rsorted || !csorted) { 3871 Mat new_mat; 3872 IS is_perm_r,is_perm_c; 3873 3874 if (!rsorted) { 3875 PetscInt *idxs_r,i; 3876 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3877 for (i=0;i<rsize;i++) { 3878 idxs_r[idxs_perm_r[i]] = i; 3879 } 3880 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3881 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3882 } else { 3883 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3884 } 3885 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3886 3887 if (!csorted) { 3888 if (isrow_s == iscol_s) { 3889 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3890 is_perm_c = is_perm_r; 3891 } else { 3892 PetscInt *idxs_c,i; 3893 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3894 for (i=0;i<csize;i++) { 3895 idxs_c[idxs_perm_c[i]] = i; 3896 } 3897 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3898 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3899 } 3900 } else { 3901 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3902 } 3903 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3904 3905 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3906 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3907 work_mat[0] = new_mat; 3908 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3909 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3910 } 3911 3912 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 3913 *B = work_mat[0]; 3914 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 3915 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 3916 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 3917 PetscFunctionReturn(0); 3918 } 3919 3920 #undef __FUNCT__ 3921 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 3922 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 3923 { 3924 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3925 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3926 Mat new_mat; 3927 IS is_local,is_global; 3928 PetscInt local_size; 3929 PetscBool isseqaij; 3930 PetscErrorCode ierr; 3931 3932 PetscFunctionBegin; 3933 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3934 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 3935 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 3936 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 3937 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 3938 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 3939 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 3940 3941 /* check */ 3942 if (pcbddc->dbg_flag) { 3943 Vec x,x_change; 3944 PetscReal error; 3945 3946 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 3947 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3948 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 3949 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3950 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3951 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 3952 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3953 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3954 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3955 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3956 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3957 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 3958 ierr = VecDestroy(&x);CHKERRQ(ierr); 3959 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3960 } 3961 3962 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 3963 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3964 if (isseqaij) { 3965 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3966 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3967 } else { 3968 Mat work_mat; 3969 3970 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3971 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3972 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3973 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 3974 } 3975 if (matis->A->symmetric_set) { 3976 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 3977 #if !defined(PETSC_USE_COMPLEX) 3978 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 3979 #endif 3980 } 3981 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 3982 PetscFunctionReturn(0); 3983 } 3984 3985 #undef __FUNCT__ 3986 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 3987 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 3988 { 3989 PC_IS* pcis = (PC_IS*)(pc->data); 3990 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3991 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3992 PetscInt *idx_R_local=NULL; 3993 PetscInt n_vertices,i,j,n_R,n_D,n_B; 3994 PetscInt vbs,bs; 3995 PetscBT bitmask=NULL; 3996 PetscErrorCode ierr; 3997 3998 PetscFunctionBegin; 3999 /* 4000 No need to setup local scatters if 4001 - primal space is unchanged 4002 AND 4003 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4004 AND 4005 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4006 */ 4007 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4008 PetscFunctionReturn(0); 4009 } 4010 /* destroy old objects */ 4011 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4012 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4013 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4014 /* Set Non-overlapping dimensions */ 4015 n_B = pcis->n_B; 4016 n_D = pcis->n - n_B; 4017 n_vertices = pcbddc->n_vertices; 4018 4019 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4020 4021 /* create auxiliary bitmask and allocate workspace */ 4022 if (!sub_schurs || !sub_schurs->reuse_solver) { 4023 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4024 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4025 for (i=0;i<n_vertices;i++) { 4026 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4027 } 4028 4029 for (i=0, n_R=0; i<pcis->n; i++) { 4030 if (!PetscBTLookup(bitmask,i)) { 4031 idx_R_local[n_R++] = i; 4032 } 4033 } 4034 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4035 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4036 4037 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4038 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4039 } 4040 4041 /* Block code */ 4042 vbs = 1; 4043 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4044 if (bs>1 && !(n_vertices%bs)) { 4045 PetscBool is_blocked = PETSC_TRUE; 4046 PetscInt *vary; 4047 if (!sub_schurs || !sub_schurs->reuse_solver) { 4048 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4049 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4050 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4051 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4052 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4053 for (i=0; i<pcis->n/bs; i++) { 4054 if (vary[i]!=0 && vary[i]!=bs) { 4055 is_blocked = PETSC_FALSE; 4056 break; 4057 } 4058 } 4059 ierr = PetscFree(vary);CHKERRQ(ierr); 4060 } else { 4061 /* Verify directly the R set */ 4062 for (i=0; i<n_R/bs; i++) { 4063 PetscInt j,node=idx_R_local[bs*i]; 4064 for (j=1; j<bs; j++) { 4065 if (node != idx_R_local[bs*i+j]-j) { 4066 is_blocked = PETSC_FALSE; 4067 break; 4068 } 4069 } 4070 } 4071 } 4072 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4073 vbs = bs; 4074 for (i=0;i<n_R/vbs;i++) { 4075 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4076 } 4077 } 4078 } 4079 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4080 if (sub_schurs && sub_schurs->reuse_solver) { 4081 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4082 4083 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4084 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4085 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4086 reuse_solver->is_R = pcbddc->is_R_local; 4087 } else { 4088 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4089 } 4090 4091 /* print some info if requested */ 4092 if (pcbddc->dbg_flag) { 4093 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4094 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4095 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4096 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4097 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4098 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4099 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4100 } 4101 4102 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4103 if (!sub_schurs || !sub_schurs->reuse_solver) { 4104 IS is_aux1,is_aux2; 4105 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4106 4107 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4108 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4109 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4110 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4111 for (i=0; i<n_D; i++) { 4112 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4113 } 4114 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4115 for (i=0, j=0; i<n_R; i++) { 4116 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4117 aux_array1[j++] = i; 4118 } 4119 } 4120 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4121 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4122 for (i=0, j=0; i<n_B; i++) { 4123 if (!PetscBTLookup(bitmask,is_indices[i])) { 4124 aux_array2[j++] = i; 4125 } 4126 } 4127 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4128 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4129 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4130 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4131 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4132 4133 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4134 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4135 for (i=0, j=0; i<n_R; i++) { 4136 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4137 aux_array1[j++] = i; 4138 } 4139 } 4140 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4141 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4142 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4143 } 4144 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4145 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4146 } else { 4147 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4148 IS tis; 4149 PetscInt schur_size; 4150 4151 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4152 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4153 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4154 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4155 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4156 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4157 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4158 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4159 } 4160 } 4161 PetscFunctionReturn(0); 4162 } 4163 4164 4165 #undef __FUNCT__ 4166 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4167 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4168 { 4169 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4170 PC_IS *pcis = (PC_IS*)pc->data; 4171 PC pc_temp; 4172 Mat A_RR; 4173 MatReuse reuse; 4174 PetscScalar m_one = -1.0; 4175 PetscReal value; 4176 PetscInt n_D,n_R; 4177 PetscBool check_corr[2],issbaij; 4178 PetscErrorCode ierr; 4179 /* prefixes stuff */ 4180 char dir_prefix[256],neu_prefix[256],str_level[16]; 4181 size_t len; 4182 4183 PetscFunctionBegin; 4184 4185 /* compute prefixes */ 4186 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4187 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4188 if (!pcbddc->current_level) { 4189 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4190 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4191 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4192 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4193 } else { 4194 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4195 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4196 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4197 len -= 15; /* remove "pc_bddc_coarse_" */ 4198 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4199 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4200 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4201 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4202 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4203 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4204 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4205 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4206 } 4207 4208 /* DIRICHLET PROBLEM */ 4209 if (dirichlet) { 4210 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4211 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4212 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4213 if (pcbddc->dbg_flag) { 4214 Mat A_IIn; 4215 4216 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4217 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4218 pcis->A_II = A_IIn; 4219 } 4220 } 4221 if (pcbddc->local_mat->symmetric_set) { 4222 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4223 } 4224 /* Matrix for Dirichlet problem is pcis->A_II */ 4225 n_D = pcis->n - pcis->n_B; 4226 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4227 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4228 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4229 /* default */ 4230 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4231 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4232 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4233 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4234 if (issbaij) { 4235 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4236 } else { 4237 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4238 } 4239 /* Allow user's customization */ 4240 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4241 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4242 } 4243 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4244 if (sub_schurs && sub_schurs->reuse_solver) { 4245 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4246 4247 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4248 } 4249 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4250 if (!n_D) { 4251 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4252 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4253 } 4254 /* Set Up KSP for Dirichlet problem of BDDC */ 4255 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4256 /* set ksp_D into pcis data */ 4257 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4258 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4259 pcis->ksp_D = pcbddc->ksp_D; 4260 } 4261 4262 /* NEUMANN PROBLEM */ 4263 A_RR = 0; 4264 if (neumann) { 4265 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4266 PetscInt ibs,mbs; 4267 PetscBool issbaij; 4268 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4269 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4270 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4271 if (pcbddc->ksp_R) { /* already created ksp */ 4272 PetscInt nn_R; 4273 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4274 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4275 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4276 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4277 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4278 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4279 reuse = MAT_INITIAL_MATRIX; 4280 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4281 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4282 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4283 reuse = MAT_INITIAL_MATRIX; 4284 } else { /* safe to reuse the matrix */ 4285 reuse = MAT_REUSE_MATRIX; 4286 } 4287 } 4288 /* last check */ 4289 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4290 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4291 reuse = MAT_INITIAL_MATRIX; 4292 } 4293 } else { /* first time, so we need to create the matrix */ 4294 reuse = MAT_INITIAL_MATRIX; 4295 } 4296 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4297 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4298 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4299 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4300 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4301 if (matis->A == pcbddc->local_mat) { 4302 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4303 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4304 } else { 4305 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4306 } 4307 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4308 if (matis->A == pcbddc->local_mat) { 4309 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4310 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4311 } else { 4312 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4313 } 4314 } 4315 /* extract A_RR */ 4316 if (sub_schurs && sub_schurs->reuse_solver) { 4317 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4318 4319 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4320 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4321 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4322 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4323 } else { 4324 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4325 } 4326 } else { 4327 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4328 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4329 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4330 } 4331 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4332 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4333 } 4334 if (pcbddc->local_mat->symmetric_set) { 4335 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4336 } 4337 if (!pcbddc->ksp_R) { /* create object if not present */ 4338 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4339 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4340 /* default */ 4341 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4342 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4343 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4344 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4345 if (issbaij) { 4346 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4347 } else { 4348 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4349 } 4350 /* Allow user's customization */ 4351 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4352 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4353 } 4354 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4355 if (!n_R) { 4356 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4357 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4358 } 4359 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4360 /* Reuse solver if it is present */ 4361 if (sub_schurs && sub_schurs->reuse_solver) { 4362 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4363 4364 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4365 } 4366 /* Set Up KSP for Neumann problem of BDDC */ 4367 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4368 } 4369 4370 if (pcbddc->dbg_flag) { 4371 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4372 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4373 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4374 } 4375 4376 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4377 check_corr[0] = check_corr[1] = PETSC_FALSE; 4378 if (pcbddc->NullSpace_corr[0]) { 4379 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4380 } 4381 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4382 check_corr[0] = PETSC_TRUE; 4383 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4384 } 4385 if (neumann && pcbddc->NullSpace_corr[2]) { 4386 check_corr[1] = PETSC_TRUE; 4387 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4388 } 4389 4390 /* check Dirichlet and Neumann solvers */ 4391 if (pcbddc->dbg_flag) { 4392 if (dirichlet) { /* Dirichlet */ 4393 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4394 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4395 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4396 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4397 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4398 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4399 if (check_corr[0]) { 4400 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4401 } 4402 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4403 } 4404 if (neumann) { /* Neumann */ 4405 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4406 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4407 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4408 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4409 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4411 if (check_corr[1]) { 4412 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4413 } 4414 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4415 } 4416 } 4417 /* free Neumann problem's matrix */ 4418 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4419 PetscFunctionReturn(0); 4420 } 4421 4422 #undef __FUNCT__ 4423 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4424 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4425 { 4426 PetscErrorCode ierr; 4427 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4428 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4429 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4430 4431 PetscFunctionBegin; 4432 if (!reuse_solver) { 4433 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4434 } 4435 if (!pcbddc->switch_static) { 4436 if (applytranspose && pcbddc->local_auxmat1) { 4437 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4438 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4439 } 4440 if (!reuse_solver) { 4441 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4442 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4443 } else { 4444 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4445 4446 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4447 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4448 } 4449 } else { 4450 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4451 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4452 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4453 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4454 if (applytranspose && pcbddc->local_auxmat1) { 4455 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4456 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4457 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4458 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4459 } 4460 } 4461 if (!reuse_solver || pcbddc->switch_static) { 4462 if (applytranspose) { 4463 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4464 } else { 4465 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4466 } 4467 } else { 4468 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4469 4470 if (applytranspose) { 4471 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4472 } else { 4473 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4474 } 4475 } 4476 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4477 if (!pcbddc->switch_static) { 4478 if (!reuse_solver) { 4479 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4480 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4481 } else { 4482 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4483 4484 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4485 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4486 } 4487 if (!applytranspose && pcbddc->local_auxmat1) { 4488 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4489 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4490 } 4491 } else { 4492 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4493 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4494 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4495 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4496 if (!applytranspose && pcbddc->local_auxmat1) { 4497 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4498 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4499 } 4500 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4501 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4502 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4503 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4504 } 4505 PetscFunctionReturn(0); 4506 } 4507 4508 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4509 #undef __FUNCT__ 4510 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4511 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4512 { 4513 PetscErrorCode ierr; 4514 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4515 PC_IS* pcis = (PC_IS*) (pc->data); 4516 const PetscScalar zero = 0.0; 4517 4518 PetscFunctionBegin; 4519 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4520 if (!pcbddc->benign_apply_coarse_only) { 4521 if (applytranspose) { 4522 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4523 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4524 } else { 4525 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4526 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4527 } 4528 } else { 4529 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4530 } 4531 4532 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4533 if (pcbddc->benign_n) { 4534 PetscScalar *array; 4535 PetscInt j; 4536 4537 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4538 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4539 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4540 } 4541 4542 /* start communications from local primal nodes to rhs of coarse solver */ 4543 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4544 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4545 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4546 4547 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4548 if (pcbddc->coarse_ksp) { 4549 Mat coarse_mat; 4550 Vec rhs,sol; 4551 MatNullSpace nullsp; 4552 PetscBool isbddc = PETSC_FALSE; 4553 4554 if (pcbddc->benign_have_null) { 4555 PC coarse_pc; 4556 4557 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4558 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4559 /* we need to propagate to coarser levels the need for a possible benign correction */ 4560 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4561 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4562 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4563 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4564 } 4565 } 4566 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4567 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4568 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4569 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4570 if (nullsp) { 4571 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4572 } 4573 if (applytranspose) { 4574 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4575 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4576 } else { 4577 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4578 PC coarse_pc; 4579 4580 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4581 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4582 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4583 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4584 } else { 4585 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4586 } 4587 } 4588 /* we don't need the benign correction at coarser levels anymore */ 4589 if (pcbddc->benign_have_null && isbddc) { 4590 PC coarse_pc; 4591 PC_BDDC* coarsepcbddc; 4592 4593 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4594 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4595 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4596 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4597 } 4598 if (nullsp) { 4599 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4600 } 4601 } 4602 4603 /* Local solution on R nodes */ 4604 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4605 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4606 } 4607 /* communications from coarse sol to local primal nodes */ 4608 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4609 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4610 4611 /* Sum contributions from the two levels */ 4612 if (!pcbddc->benign_apply_coarse_only) { 4613 if (applytranspose) { 4614 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4615 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4616 } else { 4617 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4618 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4619 } 4620 /* store p0 */ 4621 if (pcbddc->benign_n) { 4622 PetscScalar *array; 4623 PetscInt j; 4624 4625 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4626 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4627 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4628 } 4629 } else { /* expand the coarse solution */ 4630 if (applytranspose) { 4631 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4632 } else { 4633 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4634 } 4635 } 4636 PetscFunctionReturn(0); 4637 } 4638 4639 #undef __FUNCT__ 4640 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4641 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4642 { 4643 PetscErrorCode ierr; 4644 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4645 PetscScalar *array; 4646 Vec from,to; 4647 4648 PetscFunctionBegin; 4649 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4650 from = pcbddc->coarse_vec; 4651 to = pcbddc->vec1_P; 4652 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4653 Vec tvec; 4654 4655 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4656 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4657 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4658 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4659 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4660 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4661 } 4662 } else { /* from local to global -> put data in coarse right hand side */ 4663 from = pcbddc->vec1_P; 4664 to = pcbddc->coarse_vec; 4665 } 4666 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4667 PetscFunctionReturn(0); 4668 } 4669 4670 #undef __FUNCT__ 4671 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4672 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4673 { 4674 PetscErrorCode ierr; 4675 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4676 PetscScalar *array; 4677 Vec from,to; 4678 4679 PetscFunctionBegin; 4680 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4681 from = pcbddc->coarse_vec; 4682 to = pcbddc->vec1_P; 4683 } else { /* from local to global -> put data in coarse right hand side */ 4684 from = pcbddc->vec1_P; 4685 to = pcbddc->coarse_vec; 4686 } 4687 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4688 if (smode == SCATTER_FORWARD) { 4689 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4690 Vec tvec; 4691 4692 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4693 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4694 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4695 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4696 } 4697 } else { 4698 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4699 ierr = VecResetArray(from);CHKERRQ(ierr); 4700 } 4701 } 4702 PetscFunctionReturn(0); 4703 } 4704 4705 /* uncomment for testing purposes */ 4706 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4707 #undef __FUNCT__ 4708 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4709 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4710 { 4711 PetscErrorCode ierr; 4712 PC_IS* pcis = (PC_IS*)(pc->data); 4713 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4714 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4715 /* one and zero */ 4716 PetscScalar one=1.0,zero=0.0; 4717 /* space to store constraints and their local indices */ 4718 PetscScalar *constraints_data; 4719 PetscInt *constraints_idxs,*constraints_idxs_B; 4720 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4721 PetscInt *constraints_n; 4722 /* iterators */ 4723 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4724 /* BLAS integers */ 4725 PetscBLASInt lwork,lierr; 4726 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4727 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4728 /* reuse */ 4729 PetscInt olocal_primal_size,olocal_primal_size_cc; 4730 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4731 /* change of basis */ 4732 PetscBool qr_needed; 4733 PetscBT change_basis,qr_needed_idx; 4734 /* auxiliary stuff */ 4735 PetscInt *nnz,*is_indices; 4736 PetscInt ncc; 4737 /* some quantities */ 4738 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4739 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4740 4741 PetscFunctionBegin; 4742 /* Destroy Mat objects computed previously */ 4743 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4744 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4745 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4746 /* save info on constraints from previous setup (if any) */ 4747 olocal_primal_size = pcbddc->local_primal_size; 4748 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4749 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4750 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4751 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4752 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4753 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4754 4755 if (!pcbddc->adaptive_selection) { 4756 IS ISForVertices,*ISForFaces,*ISForEdges; 4757 MatNullSpace nearnullsp; 4758 const Vec *nearnullvecs; 4759 Vec *localnearnullsp; 4760 PetscScalar *array; 4761 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4762 PetscBool nnsp_has_cnst; 4763 /* LAPACK working arrays for SVD or POD */ 4764 PetscBool skip_lapack,boolforchange; 4765 PetscScalar *work; 4766 PetscReal *singular_vals; 4767 #if defined(PETSC_USE_COMPLEX) 4768 PetscReal *rwork; 4769 #endif 4770 #if defined(PETSC_MISSING_LAPACK_GESVD) 4771 PetscScalar *temp_basis,*correlation_mat; 4772 #else 4773 PetscBLASInt dummy_int=1; 4774 PetscScalar dummy_scalar=1.; 4775 #endif 4776 4777 /* Get index sets for faces, edges and vertices from graph */ 4778 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4779 /* print some info */ 4780 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 4781 PetscInt nv; 4782 4783 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4784 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4785 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4786 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4787 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4788 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4789 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4790 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4791 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4792 } 4793 4794 /* free unneeded index sets */ 4795 if (!pcbddc->use_vertices) { 4796 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4797 } 4798 if (!pcbddc->use_edges) { 4799 for (i=0;i<n_ISForEdges;i++) { 4800 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4801 } 4802 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4803 n_ISForEdges = 0; 4804 } 4805 if (!pcbddc->use_faces) { 4806 for (i=0;i<n_ISForFaces;i++) { 4807 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4808 } 4809 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4810 n_ISForFaces = 0; 4811 } 4812 4813 /* check if near null space is attached to global mat */ 4814 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4815 if (nearnullsp) { 4816 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4817 /* remove any stored info */ 4818 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4819 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4820 /* store information for BDDC solver reuse */ 4821 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4822 pcbddc->onearnullspace = nearnullsp; 4823 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4824 for (i=0;i<nnsp_size;i++) { 4825 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4826 } 4827 } else { /* if near null space is not provided BDDC uses constants by default */ 4828 nnsp_size = 0; 4829 nnsp_has_cnst = PETSC_TRUE; 4830 } 4831 /* get max number of constraints on a single cc */ 4832 max_constraints = nnsp_size; 4833 if (nnsp_has_cnst) max_constraints++; 4834 4835 /* 4836 Evaluate maximum storage size needed by the procedure 4837 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4838 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4839 There can be multiple constraints per connected component 4840 */ 4841 n_vertices = 0; 4842 if (ISForVertices) { 4843 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4844 } 4845 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4846 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4847 4848 total_counts = n_ISForFaces+n_ISForEdges; 4849 total_counts *= max_constraints; 4850 total_counts += n_vertices; 4851 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4852 4853 total_counts = 0; 4854 max_size_of_constraint = 0; 4855 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4856 IS used_is; 4857 if (i<n_ISForEdges) { 4858 used_is = ISForEdges[i]; 4859 } else { 4860 used_is = ISForFaces[i-n_ISForEdges]; 4861 } 4862 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4863 total_counts += j; 4864 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4865 } 4866 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4867 4868 /* get local part of global near null space vectors */ 4869 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4870 for (k=0;k<nnsp_size;k++) { 4871 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4872 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4873 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4874 } 4875 4876 /* whether or not to skip lapack calls */ 4877 skip_lapack = PETSC_TRUE; 4878 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4879 4880 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4881 if (!skip_lapack) { 4882 PetscScalar temp_work; 4883 4884 #if defined(PETSC_MISSING_LAPACK_GESVD) 4885 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4886 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4887 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4888 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4889 #if defined(PETSC_USE_COMPLEX) 4890 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4891 #endif 4892 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4893 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4894 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4895 lwork = -1; 4896 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4897 #if !defined(PETSC_USE_COMPLEX) 4898 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4899 #else 4900 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4901 #endif 4902 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4903 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4904 #else /* on missing GESVD */ 4905 /* SVD */ 4906 PetscInt max_n,min_n; 4907 max_n = max_size_of_constraint; 4908 min_n = max_constraints; 4909 if (max_size_of_constraint < max_constraints) { 4910 min_n = max_size_of_constraint; 4911 max_n = max_constraints; 4912 } 4913 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 4914 #if defined(PETSC_USE_COMPLEX) 4915 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 4916 #endif 4917 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4918 lwork = -1; 4919 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 4920 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 4921 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 4922 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4923 #if !defined(PETSC_USE_COMPLEX) 4924 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 4925 #else 4926 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 4927 #endif 4928 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4929 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 4930 #endif /* on missing GESVD */ 4931 /* Allocate optimal workspace */ 4932 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 4933 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 4934 } 4935 /* Now we can loop on constraining sets */ 4936 total_counts = 0; 4937 constraints_idxs_ptr[0] = 0; 4938 constraints_data_ptr[0] = 0; 4939 /* vertices */ 4940 if (n_vertices) { 4941 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4942 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4943 for (i=0;i<n_vertices;i++) { 4944 constraints_n[total_counts] = 1; 4945 constraints_data[total_counts] = 1.0; 4946 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 4947 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 4948 total_counts++; 4949 } 4950 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4951 n_vertices = total_counts; 4952 } 4953 4954 /* edges and faces */ 4955 total_counts_cc = total_counts; 4956 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 4957 IS used_is; 4958 PetscBool idxs_copied = PETSC_FALSE; 4959 4960 if (ncc<n_ISForEdges) { 4961 used_is = ISForEdges[ncc]; 4962 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 4963 } else { 4964 used_is = ISForFaces[ncc-n_ISForEdges]; 4965 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 4966 } 4967 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 4968 4969 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 4970 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4971 /* change of basis should not be performed on local periodic nodes */ 4972 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 4973 if (nnsp_has_cnst) { 4974 PetscScalar quad_value; 4975 4976 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4977 idxs_copied = PETSC_TRUE; 4978 4979 if (!pcbddc->use_nnsp_true) { 4980 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 4981 } else { 4982 quad_value = 1.0; 4983 } 4984 for (j=0;j<size_of_constraint;j++) { 4985 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 4986 } 4987 temp_constraints++; 4988 total_counts++; 4989 } 4990 for (k=0;k<nnsp_size;k++) { 4991 PetscReal real_value; 4992 PetscScalar *ptr_to_data; 4993 4994 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4995 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 4996 for (j=0;j<size_of_constraint;j++) { 4997 ptr_to_data[j] = array[is_indices[j]]; 4998 } 4999 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5000 /* check if array is null on the connected component */ 5001 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5002 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5003 if (real_value > 0.0) { /* keep indices and values */ 5004 temp_constraints++; 5005 total_counts++; 5006 if (!idxs_copied) { 5007 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5008 idxs_copied = PETSC_TRUE; 5009 } 5010 } 5011 } 5012 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5013 valid_constraints = temp_constraints; 5014 if (!pcbddc->use_nnsp_true && temp_constraints) { 5015 if (temp_constraints == 1) { /* just normalize the constraint */ 5016 PetscScalar norm,*ptr_to_data; 5017 5018 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5020 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5021 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5022 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5023 } else { /* perform SVD */ 5024 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5025 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5026 5027 #if defined(PETSC_MISSING_LAPACK_GESVD) 5028 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5029 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5030 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5031 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5032 from that computed using LAPACKgesvd 5033 -> This is due to a different computation of eigenvectors in LAPACKheev 5034 -> The quality of the POD-computed basis will be the same */ 5035 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5036 /* Store upper triangular part of correlation matrix */ 5037 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5038 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5039 for (j=0;j<temp_constraints;j++) { 5040 for (k=0;k<j+1;k++) { 5041 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5042 } 5043 } 5044 /* compute eigenvalues and eigenvectors of correlation matrix */ 5045 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5046 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5047 #if !defined(PETSC_USE_COMPLEX) 5048 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5049 #else 5050 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5051 #endif 5052 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5053 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5054 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5055 j = 0; 5056 while (j < temp_constraints && singular_vals[j] < tol) j++; 5057 total_counts = total_counts-j; 5058 valid_constraints = temp_constraints-j; 5059 /* scale and copy POD basis into used quadrature memory */ 5060 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5061 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5062 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5063 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5064 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5065 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5066 if (j<temp_constraints) { 5067 PetscInt ii; 5068 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5069 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5070 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5071 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5072 for (k=0;k<temp_constraints-j;k++) { 5073 for (ii=0;ii<size_of_constraint;ii++) { 5074 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5075 } 5076 } 5077 } 5078 #else /* on missing GESVD */ 5079 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5080 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5081 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5082 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5083 #if !defined(PETSC_USE_COMPLEX) 5084 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5085 #else 5086 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5087 #endif 5088 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5089 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5090 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5091 k = temp_constraints; 5092 if (k > size_of_constraint) k = size_of_constraint; 5093 j = 0; 5094 while (j < k && singular_vals[k-j-1] < tol) j++; 5095 valid_constraints = k-j; 5096 total_counts = total_counts-temp_constraints+valid_constraints; 5097 #endif /* on missing GESVD */ 5098 } 5099 } 5100 /* update pointers information */ 5101 if (valid_constraints) { 5102 constraints_n[total_counts_cc] = valid_constraints; 5103 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5104 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5105 /* set change_of_basis flag */ 5106 if (boolforchange) { 5107 PetscBTSet(change_basis,total_counts_cc); 5108 } 5109 total_counts_cc++; 5110 } 5111 } 5112 /* free workspace */ 5113 if (!skip_lapack) { 5114 ierr = PetscFree(work);CHKERRQ(ierr); 5115 #if defined(PETSC_USE_COMPLEX) 5116 ierr = PetscFree(rwork);CHKERRQ(ierr); 5117 #endif 5118 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5119 #if defined(PETSC_MISSING_LAPACK_GESVD) 5120 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5121 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5122 #endif 5123 } 5124 for (k=0;k<nnsp_size;k++) { 5125 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5126 } 5127 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5128 /* free index sets of faces, edges and vertices */ 5129 for (i=0;i<n_ISForFaces;i++) { 5130 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5131 } 5132 if (n_ISForFaces) { 5133 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5134 } 5135 for (i=0;i<n_ISForEdges;i++) { 5136 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5137 } 5138 if (n_ISForEdges) { 5139 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5140 } 5141 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5142 } else { 5143 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5144 5145 total_counts = 0; 5146 n_vertices = 0; 5147 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5148 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5149 } 5150 max_constraints = 0; 5151 total_counts_cc = 0; 5152 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5153 total_counts += pcbddc->adaptive_constraints_n[i]; 5154 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5155 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5156 } 5157 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5158 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5159 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5160 constraints_data = pcbddc->adaptive_constraints_data; 5161 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5162 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5163 total_counts_cc = 0; 5164 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5165 if (pcbddc->adaptive_constraints_n[i]) { 5166 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5167 } 5168 } 5169 #if 0 5170 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5171 for (i=0;i<total_counts_cc;i++) { 5172 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5173 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5174 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5175 printf(" %d",constraints_idxs[j]); 5176 } 5177 printf("\n"); 5178 printf("number of cc: %d\n",constraints_n[i]); 5179 } 5180 for (i=0;i<n_vertices;i++) { 5181 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5182 } 5183 for (i=0;i<sub_schurs->n_subs;i++) { 5184 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5185 } 5186 #endif 5187 5188 max_size_of_constraint = 0; 5189 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5190 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5191 /* Change of basis */ 5192 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5193 if (pcbddc->use_change_of_basis) { 5194 for (i=0;i<sub_schurs->n_subs;i++) { 5195 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5196 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5197 } 5198 } 5199 } 5200 } 5201 pcbddc->local_primal_size = total_counts; 5202 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5203 5204 /* map constraints_idxs in boundary numbering */ 5205 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5206 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5207 5208 /* Create constraint matrix */ 5209 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5210 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5211 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5212 5213 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5214 /* determine if a QR strategy is needed for change of basis */ 5215 qr_needed = PETSC_FALSE; 5216 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5217 total_primal_vertices=0; 5218 pcbddc->local_primal_size_cc = 0; 5219 for (i=0;i<total_counts_cc;i++) { 5220 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5221 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5222 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5223 pcbddc->local_primal_size_cc += 1; 5224 } else if (PetscBTLookup(change_basis,i)) { 5225 for (k=0;k<constraints_n[i];k++) { 5226 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5227 } 5228 pcbddc->local_primal_size_cc += constraints_n[i]; 5229 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5230 PetscBTSet(qr_needed_idx,i); 5231 qr_needed = PETSC_TRUE; 5232 } 5233 } else { 5234 pcbddc->local_primal_size_cc += 1; 5235 } 5236 } 5237 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5238 pcbddc->n_vertices = total_primal_vertices; 5239 /* permute indices in order to have a sorted set of vertices */ 5240 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5241 5242 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5243 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5244 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5245 5246 /* nonzero structure of constraint matrix */ 5247 /* and get reference dof for local constraints */ 5248 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5249 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5250 5251 j = total_primal_vertices; 5252 total_counts = total_primal_vertices; 5253 cum = total_primal_vertices; 5254 for (i=n_vertices;i<total_counts_cc;i++) { 5255 if (!PetscBTLookup(change_basis,i)) { 5256 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5257 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5258 cum++; 5259 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5260 for (k=0;k<constraints_n[i];k++) { 5261 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5262 nnz[j+k] = size_of_constraint; 5263 } 5264 j += constraints_n[i]; 5265 } 5266 } 5267 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5268 ierr = PetscFree(nnz);CHKERRQ(ierr); 5269 5270 /* set values in constraint matrix */ 5271 for (i=0;i<total_primal_vertices;i++) { 5272 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5273 } 5274 total_counts = total_primal_vertices; 5275 for (i=n_vertices;i<total_counts_cc;i++) { 5276 if (!PetscBTLookup(change_basis,i)) { 5277 PetscInt *cols; 5278 5279 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5280 cols = constraints_idxs+constraints_idxs_ptr[i]; 5281 for (k=0;k<constraints_n[i];k++) { 5282 PetscInt row = total_counts+k; 5283 PetscScalar *vals; 5284 5285 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5286 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5287 } 5288 total_counts += constraints_n[i]; 5289 } 5290 } 5291 /* assembling */ 5292 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5293 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5294 5295 /* 5296 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5297 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5298 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5299 */ 5300 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5301 if (pcbddc->use_change_of_basis) { 5302 /* dual and primal dofs on a single cc */ 5303 PetscInt dual_dofs,primal_dofs; 5304 /* working stuff for GEQRF */ 5305 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5306 PetscBLASInt lqr_work; 5307 /* working stuff for UNGQR */ 5308 PetscScalar *gqr_work,lgqr_work_t; 5309 PetscBLASInt lgqr_work; 5310 /* working stuff for TRTRS */ 5311 PetscScalar *trs_rhs; 5312 PetscBLASInt Blas_NRHS; 5313 /* pointers for values insertion into change of basis matrix */ 5314 PetscInt *start_rows,*start_cols; 5315 PetscScalar *start_vals; 5316 /* working stuff for values insertion */ 5317 PetscBT is_primal; 5318 PetscInt *aux_primal_numbering_B; 5319 /* matrix sizes */ 5320 PetscInt global_size,local_size; 5321 /* temporary change of basis */ 5322 Mat localChangeOfBasisMatrix; 5323 /* extra space for debugging */ 5324 PetscScalar *dbg_work; 5325 5326 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5327 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5328 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5329 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5330 /* nonzeros for local mat */ 5331 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5332 if (!pcbddc->benign_change || pcbddc->fake_change) { 5333 for (i=0;i<pcis->n;i++) nnz[i]=1; 5334 } else { 5335 const PetscInt *ii; 5336 PetscInt n; 5337 PetscBool flg_row; 5338 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5339 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5340 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5341 } 5342 for (i=n_vertices;i<total_counts_cc;i++) { 5343 if (PetscBTLookup(change_basis,i)) { 5344 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5345 if (PetscBTLookup(qr_needed_idx,i)) { 5346 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5347 } else { 5348 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5349 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5350 } 5351 } 5352 } 5353 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5354 ierr = PetscFree(nnz);CHKERRQ(ierr); 5355 /* Set interior change in the matrix */ 5356 if (!pcbddc->benign_change || pcbddc->fake_change) { 5357 for (i=0;i<pcis->n;i++) { 5358 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5359 } 5360 } else { 5361 const PetscInt *ii,*jj; 5362 PetscScalar *aa; 5363 PetscInt n; 5364 PetscBool flg_row; 5365 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5366 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5367 for (i=0;i<n;i++) { 5368 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5369 } 5370 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5371 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5372 } 5373 5374 if (pcbddc->dbg_flag) { 5375 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5376 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5377 } 5378 5379 5380 /* Now we loop on the constraints which need a change of basis */ 5381 /* 5382 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5383 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5384 5385 Basic blocks of change of basis matrix T computed by 5386 5387 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5388 5389 | 1 0 ... 0 s_1/S | 5390 | 0 1 ... 0 s_2/S | 5391 | ... | 5392 | 0 ... 1 s_{n-1}/S | 5393 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5394 5395 with S = \sum_{i=1}^n s_i^2 5396 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5397 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5398 5399 - QR decomposition of constraints otherwise 5400 */ 5401 if (qr_needed) { 5402 /* space to store Q */ 5403 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5404 /* array to store scaling factors for reflectors */ 5405 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5406 /* first we issue queries for optimal work */ 5407 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5408 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5409 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5410 lqr_work = -1; 5411 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5412 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5413 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5414 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5415 lgqr_work = -1; 5416 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5417 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5418 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5419 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5420 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5421 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5422 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5423 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5424 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5425 /* array to store rhs and solution of triangular solver */ 5426 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5427 /* allocating workspace for check */ 5428 if (pcbddc->dbg_flag) { 5429 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5430 } 5431 } 5432 /* array to store whether a node is primal or not */ 5433 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5434 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5435 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5436 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5437 for (i=0;i<total_primal_vertices;i++) { 5438 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5439 } 5440 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5441 5442 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5443 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5444 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5445 if (PetscBTLookup(change_basis,total_counts)) { 5446 /* get constraint info */ 5447 primal_dofs = constraints_n[total_counts]; 5448 dual_dofs = size_of_constraint-primal_dofs; 5449 5450 if (pcbddc->dbg_flag) { 5451 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5452 } 5453 5454 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5455 5456 /* copy quadrature constraints for change of basis check */ 5457 if (pcbddc->dbg_flag) { 5458 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5459 } 5460 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5461 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5462 5463 /* compute QR decomposition of constraints */ 5464 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5465 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5466 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5467 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5468 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5469 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5470 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5471 5472 /* explictly compute R^-T */ 5473 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5474 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5475 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5476 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5477 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5478 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5479 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5480 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5481 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5482 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5483 5484 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5485 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5486 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5487 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5488 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5489 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5490 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5491 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5492 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5493 5494 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5495 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5496 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5497 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5498 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5499 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5501 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5502 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5503 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5504 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5505 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5506 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5507 5508 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5509 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5510 /* insert cols for primal dofs */ 5511 for (j=0;j<primal_dofs;j++) { 5512 start_vals = &qr_basis[j*size_of_constraint]; 5513 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5514 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5515 } 5516 /* insert cols for dual dofs */ 5517 for (j=0,k=0;j<dual_dofs;k++) { 5518 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5519 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5520 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5521 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5522 j++; 5523 } 5524 } 5525 5526 /* check change of basis */ 5527 if (pcbddc->dbg_flag) { 5528 PetscInt ii,jj; 5529 PetscBool valid_qr=PETSC_TRUE; 5530 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5531 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5533 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5535 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5536 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5537 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5538 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5539 for (jj=0;jj<size_of_constraint;jj++) { 5540 for (ii=0;ii<primal_dofs;ii++) { 5541 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5542 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5543 } 5544 } 5545 if (!valid_qr) { 5546 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5547 for (jj=0;jj<size_of_constraint;jj++) { 5548 for (ii=0;ii<primal_dofs;ii++) { 5549 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5550 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5551 } 5552 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5553 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5554 } 5555 } 5556 } 5557 } else { 5558 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5559 } 5560 } 5561 } else { /* simple transformation block */ 5562 PetscInt row,col; 5563 PetscScalar val,norm; 5564 5565 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5566 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5567 for (j=0;j<size_of_constraint;j++) { 5568 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5569 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5570 if (!PetscBTLookup(is_primal,row_B)) { 5571 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5572 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5573 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5574 } else { 5575 for (k=0;k<size_of_constraint;k++) { 5576 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5577 if (row != col) { 5578 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5579 } else { 5580 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5581 } 5582 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5583 } 5584 } 5585 } 5586 if (pcbddc->dbg_flag) { 5587 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5588 } 5589 } 5590 } else { 5591 if (pcbddc->dbg_flag) { 5592 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5593 } 5594 } 5595 } 5596 5597 /* free workspace */ 5598 if (qr_needed) { 5599 if (pcbddc->dbg_flag) { 5600 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5601 } 5602 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5603 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5604 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5605 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5606 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5607 } 5608 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5609 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5610 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5611 5612 /* assembling of global change of variable */ 5613 if (!pcbddc->fake_change) { 5614 Mat tmat; 5615 PetscInt bs; 5616 5617 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5618 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5619 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5620 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5621 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5622 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5623 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5624 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5625 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5626 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5627 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5628 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5629 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5630 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5631 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5632 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5633 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5634 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5635 5636 /* check */ 5637 if (pcbddc->dbg_flag) { 5638 PetscReal error; 5639 Vec x,x_change; 5640 5641 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5642 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5643 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5644 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5645 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5646 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5647 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5648 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5649 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5650 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5651 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5652 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5653 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5654 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5655 ierr = VecDestroy(&x);CHKERRQ(ierr); 5656 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5657 } 5658 /* adapt sub_schurs computed (if any) */ 5659 if (pcbddc->use_deluxe_scaling) { 5660 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5661 5662 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5663 if (sub_schurs && sub_schurs->S_Ej_all) { 5664 Mat S_new,tmat; 5665 IS is_all_N,is_V_Sall = NULL; 5666 5667 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5668 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5669 if (pcbddc->deluxe_zerorows) { 5670 ISLocalToGlobalMapping NtoSall; 5671 IS is_V; 5672 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5673 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5674 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5675 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5676 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5677 } 5678 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5679 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5680 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5681 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5682 if (pcbddc->deluxe_zerorows) { 5683 const PetscScalar *array; 5684 const PetscInt *idxs_V,*idxs_all; 5685 PetscInt i,n_V; 5686 5687 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5688 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5689 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5690 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5691 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5692 for (i=0;i<n_V;i++) { 5693 PetscScalar val; 5694 PetscInt idx; 5695 5696 idx = idxs_V[i]; 5697 val = array[idxs_all[idxs_V[i]]]; 5698 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5699 } 5700 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5701 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5702 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5703 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5704 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5705 } 5706 sub_schurs->S_Ej_all = S_new; 5707 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5708 if (sub_schurs->sum_S_Ej_all) { 5709 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5710 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5711 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5712 if (pcbddc->deluxe_zerorows) { 5713 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5714 } 5715 sub_schurs->sum_S_Ej_all = S_new; 5716 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5717 } 5718 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5719 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5720 } 5721 /* destroy any change of basis context in sub_schurs */ 5722 if (sub_schurs && sub_schurs->change) { 5723 PetscInt i; 5724 5725 for (i=0;i<sub_schurs->n_subs;i++) { 5726 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5727 } 5728 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5729 } 5730 } 5731 if (pcbddc->switch_static) { /* need to save the local change */ 5732 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5733 } else { 5734 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5735 } 5736 /* determine if any process has changed the pressures locally */ 5737 pcbddc->change_interior = pcbddc->benign_have_null; 5738 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5739 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5740 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5741 pcbddc->use_qr_single = qr_needed; 5742 } 5743 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5744 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5745 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5746 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5747 } else { 5748 Mat benign_global = NULL; 5749 if (pcbddc->benign_have_null) { 5750 Mat tmat; 5751 5752 pcbddc->change_interior = PETSC_TRUE; 5753 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5754 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5755 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5756 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5757 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5758 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5759 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5760 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5761 if (pcbddc->benign_change) { 5762 Mat M; 5763 5764 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5765 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5766 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5767 ierr = MatDestroy(&M);CHKERRQ(ierr); 5768 } else { 5769 Mat eye; 5770 PetscScalar *array; 5771 5772 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5773 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5774 for (i=0;i<pcis->n;i++) { 5775 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5776 } 5777 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5778 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5779 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5780 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5781 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5782 } 5783 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5784 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5785 } 5786 if (pcbddc->user_ChangeOfBasisMatrix) { 5787 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5788 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5789 } else if (pcbddc->benign_have_null) { 5790 pcbddc->ChangeOfBasisMatrix = benign_global; 5791 } 5792 } 5793 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5794 IS is_global; 5795 const PetscInt *gidxs; 5796 5797 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5798 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5799 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5800 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5801 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5802 } 5803 } 5804 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5805 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5806 } 5807 5808 if (!pcbddc->fake_change) { 5809 /* add pressure dofs to set of primal nodes for numbering purposes */ 5810 for (i=0;i<pcbddc->benign_n;i++) { 5811 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5812 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5813 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5814 pcbddc->local_primal_size_cc++; 5815 pcbddc->local_primal_size++; 5816 } 5817 5818 /* check if a new primal space has been introduced (also take into account benign trick) */ 5819 pcbddc->new_primal_space_local = PETSC_TRUE; 5820 if (olocal_primal_size == pcbddc->local_primal_size) { 5821 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5822 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5823 if (!pcbddc->new_primal_space_local) { 5824 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5825 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5826 } 5827 } 5828 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5829 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5830 } 5831 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5832 5833 /* flush dbg viewer */ 5834 if (pcbddc->dbg_flag) { 5835 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5836 } 5837 5838 /* free workspace */ 5839 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5840 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5841 if (!pcbddc->adaptive_selection) { 5842 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5843 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5844 } else { 5845 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5846 pcbddc->adaptive_constraints_idxs_ptr, 5847 pcbddc->adaptive_constraints_data_ptr, 5848 pcbddc->adaptive_constraints_idxs, 5849 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5850 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5851 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5852 } 5853 PetscFunctionReturn(0); 5854 } 5855 5856 #undef __FUNCT__ 5857 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5858 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5859 { 5860 ISLocalToGlobalMapping map; 5861 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5862 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5863 PetscInt ierr,i,N; 5864 5865 PetscFunctionBegin; 5866 if (pcbddc->graphanalyzed) PetscFunctionReturn(0); 5867 /* Reset previously computed graph */ 5868 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5869 /* Init local Graph struct */ 5870 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5871 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5872 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5873 5874 /* Check validity of the csr graph passed in by the user */ 5875 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5876 5877 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5878 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5879 PetscInt *xadj,*adjncy; 5880 PetscInt nvtxs; 5881 PetscBool flg_row=PETSC_FALSE; 5882 5883 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5884 if (flg_row) { 5885 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5886 pcbddc->computed_rowadj = PETSC_TRUE; 5887 } 5888 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5889 } 5890 if (pcbddc->dbg_flag) { 5891 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5892 } 5893 5894 /* Setup of Graph */ 5895 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5896 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5897 5898 /* attach info on disconnected subdomains if present */ 5899 if (pcbddc->n_local_subs) { 5900 PetscInt *local_subs; 5901 5902 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5903 for (i=0;i<pcbddc->n_local_subs;i++) { 5904 const PetscInt *idxs; 5905 PetscInt nl,j; 5906 5907 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5908 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5909 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 5910 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5911 } 5912 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 5913 pcbddc->mat_graph->local_subs = local_subs; 5914 } 5915 5916 /* Graph's connected components analysis */ 5917 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 5918 5919 /* set flag indicating analysis has been done */ 5920 pcbddc->graphanalyzed = PETSC_TRUE; 5921 PetscFunctionReturn(0); 5922 } 5923 5924 #undef __FUNCT__ 5925 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 5926 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 5927 { 5928 PetscInt i,j; 5929 PetscScalar *alphas; 5930 PetscErrorCode ierr; 5931 5932 PetscFunctionBegin; 5933 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 5934 for (i=0;i<n;i++) { 5935 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 5936 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 5937 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 5938 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5939 } 5940 ierr = PetscFree(alphas);CHKERRQ(ierr); 5941 PetscFunctionReturn(0); 5942 } 5943 5944 #undef __FUNCT__ 5945 #define __FUNCT__ "MatISGetSubassemblingPattern" 5946 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5947 { 5948 Mat A; 5949 PetscInt n_neighs,*neighs,*n_shared,**shared; 5950 PetscMPIInt size,rank,color; 5951 PetscInt *xadj,*adjncy; 5952 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5953 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5954 PetscInt void_procs,*procs_candidates = NULL; 5955 PetscInt xadj_count, *count; 5956 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5957 PetscSubcomm psubcomm; 5958 MPI_Comm subcomm; 5959 PetscErrorCode ierr; 5960 5961 PetscFunctionBegin; 5962 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5963 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5964 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5965 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5966 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5967 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5968 5969 if (have_void) *have_void = PETSC_FALSE; 5970 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5971 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5972 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5973 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5974 im_active = !!(n); 5975 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5976 void_procs = size - active_procs; 5977 /* get ranks of of non-active processes in mat communicator */ 5978 if (void_procs) { 5979 PetscInt ncand; 5980 5981 if (have_void) *have_void = PETSC_TRUE; 5982 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5983 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5984 for (i=0,ncand=0;i<size;i++) { 5985 if (!procs_candidates[i]) { 5986 procs_candidates[ncand++] = i; 5987 } 5988 } 5989 /* force n_subdomains to be not greater that the number of non-active processes */ 5990 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5991 } 5992 5993 /* number of subdomains requested greater than active processes -> just shift the matrix 5994 number of subdomains requested 1 -> send to master or first candidate in voids */ 5995 if (active_procs < *n_subdomains || *n_subdomains == 1) { 5996 PetscInt issize,isidx,dest; 5997 if (*n_subdomains == 1) dest = 0; 5998 else dest = rank; 5999 if (im_active) { 6000 issize = 1; 6001 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6002 isidx = procs_candidates[dest]; 6003 } else { 6004 isidx = dest; 6005 } 6006 } else { 6007 issize = 0; 6008 isidx = -1; 6009 } 6010 *n_subdomains = active_procs; 6011 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6012 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6013 PetscFunctionReturn(0); 6014 } 6015 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6016 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6017 threshold = PetscMax(threshold,2); 6018 6019 /* Get info on mapping */ 6020 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 6021 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6022 6023 /* build local CSR graph of subdomains' connectivity */ 6024 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6025 xadj[0] = 0; 6026 xadj[1] = PetscMax(n_neighs-1,0); 6027 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6028 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6029 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 6030 for (i=1;i<n_neighs;i++) 6031 for (j=0;j<n_shared[i];j++) 6032 count[shared[i][j]] += 1; 6033 6034 xadj_count = 0; 6035 for (i=1;i<n_neighs;i++) { 6036 for (j=0;j<n_shared[i];j++) { 6037 if (count[shared[i][j]] < threshold) { 6038 adjncy[xadj_count] = neighs[i]; 6039 adjncy_wgt[xadj_count] = n_shared[i]; 6040 xadj_count++; 6041 break; 6042 } 6043 } 6044 } 6045 xadj[1] = xadj_count; 6046 ierr = PetscFree(count);CHKERRQ(ierr); 6047 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6048 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6049 6050 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6051 6052 /* Restrict work on active processes only */ 6053 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6054 if (void_procs) { 6055 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6056 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6057 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6058 subcomm = PetscSubcommChild(psubcomm); 6059 } else { 6060 psubcomm = NULL; 6061 subcomm = PetscObjectComm((PetscObject)mat); 6062 } 6063 6064 v_wgt = NULL; 6065 if (!color) { 6066 ierr = PetscFree(xadj);CHKERRQ(ierr); 6067 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6068 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6069 } else { 6070 Mat subdomain_adj; 6071 IS new_ranks,new_ranks_contig; 6072 MatPartitioning partitioner; 6073 PetscInt rstart=0,rend=0; 6074 PetscInt *is_indices,*oldranks; 6075 PetscMPIInt size; 6076 PetscBool aggregate; 6077 6078 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6079 if (void_procs) { 6080 PetscInt prank = rank; 6081 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6082 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6083 for (i=0;i<xadj[1];i++) { 6084 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6085 } 6086 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6087 } else { 6088 oldranks = NULL; 6089 } 6090 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6091 if (aggregate) { /* TODO: all this part could be made more efficient */ 6092 PetscInt lrows,row,ncols,*cols; 6093 PetscMPIInt nrank; 6094 PetscScalar *vals; 6095 6096 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6097 lrows = 0; 6098 if (nrank<redprocs) { 6099 lrows = size/redprocs; 6100 if (nrank<size%redprocs) lrows++; 6101 } 6102 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6103 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6104 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6105 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6106 row = nrank; 6107 ncols = xadj[1]-xadj[0]; 6108 cols = adjncy; 6109 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6110 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6111 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6112 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6113 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6114 ierr = PetscFree(xadj);CHKERRQ(ierr); 6115 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6116 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6117 ierr = PetscFree(vals);CHKERRQ(ierr); 6118 if (use_vwgt) { 6119 Vec v; 6120 const PetscScalar *array; 6121 PetscInt nl; 6122 6123 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6124 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6125 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6126 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6127 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6128 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6129 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6130 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6131 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6132 ierr = VecDestroy(&v);CHKERRQ(ierr); 6133 } 6134 } else { 6135 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6136 if (use_vwgt) { 6137 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6138 v_wgt[0] = local_size; 6139 } 6140 } 6141 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6142 6143 /* Partition */ 6144 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6145 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6146 if (v_wgt) { 6147 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6148 } 6149 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6150 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6151 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6152 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6153 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6154 6155 /* renumber new_ranks to avoid "holes" in new set of processors */ 6156 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6157 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6158 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6159 if (!aggregate) { 6160 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6161 #if defined(PETSC_USE_DEBUG) 6162 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6163 #endif 6164 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6165 } else if (oldranks) { 6166 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6167 } else { 6168 ranks_send_to_idx[0] = is_indices[0]; 6169 } 6170 } else { 6171 PetscInt idxs[1]; 6172 PetscMPIInt tag; 6173 MPI_Request *reqs; 6174 6175 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6176 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6177 for (i=rstart;i<rend;i++) { 6178 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6179 } 6180 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6181 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6182 ierr = PetscFree(reqs);CHKERRQ(ierr); 6183 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6184 #if defined(PETSC_USE_DEBUG) 6185 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6186 #endif 6187 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6188 } else if (oldranks) { 6189 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6190 } else { 6191 ranks_send_to_idx[0] = idxs[0]; 6192 } 6193 } 6194 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6195 /* clean up */ 6196 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6197 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6198 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6199 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6200 } 6201 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6202 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6203 6204 /* assemble parallel IS for sends */ 6205 i = 1; 6206 if (!color) i=0; 6207 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6208 PetscFunctionReturn(0); 6209 } 6210 6211 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6212 6213 #undef __FUNCT__ 6214 #define __FUNCT__ "MatISSubassemble" 6215 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6216 { 6217 Mat local_mat; 6218 IS is_sends_internal; 6219 PetscInt rows,cols,new_local_rows; 6220 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6221 PetscBool ismatis,isdense,newisdense,destroy_mat; 6222 ISLocalToGlobalMapping l2gmap; 6223 PetscInt* l2gmap_indices; 6224 const PetscInt* is_indices; 6225 MatType new_local_type; 6226 /* buffers */ 6227 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6228 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6229 PetscInt *recv_buffer_idxs_local; 6230 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6231 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6232 /* MPI */ 6233 MPI_Comm comm,comm_n; 6234 PetscSubcomm subcomm; 6235 PetscMPIInt n_sends,n_recvs,commsize; 6236 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6237 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6238 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6239 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6240 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6241 PetscErrorCode ierr; 6242 6243 PetscFunctionBegin; 6244 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6245 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6246 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6247 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6248 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6249 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6250 PetscValidLogicalCollectiveBool(mat,reuse,6); 6251 PetscValidLogicalCollectiveInt(mat,nis,8); 6252 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6253 if (nvecs) { 6254 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6255 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6256 } 6257 /* further checks */ 6258 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6259 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6260 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6261 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6262 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6263 if (reuse && *mat_n) { 6264 PetscInt mrows,mcols,mnrows,mncols; 6265 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6266 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6267 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6268 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6269 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6270 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6271 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6272 } 6273 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6274 PetscValidLogicalCollectiveInt(mat,bs,0); 6275 6276 /* prepare IS for sending if not provided */ 6277 if (!is_sends) { 6278 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6279 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6280 } else { 6281 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6282 is_sends_internal = is_sends; 6283 } 6284 6285 /* get comm */ 6286 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6287 6288 /* compute number of sends */ 6289 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6290 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6291 6292 /* compute number of receives */ 6293 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6294 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6295 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6296 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6297 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6298 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6299 ierr = PetscFree(iflags);CHKERRQ(ierr); 6300 6301 /* restrict comm if requested */ 6302 subcomm = 0; 6303 destroy_mat = PETSC_FALSE; 6304 if (restrict_comm) { 6305 PetscMPIInt color,subcommsize; 6306 6307 color = 0; 6308 if (restrict_full) { 6309 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6310 } else { 6311 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6312 } 6313 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6314 subcommsize = commsize - subcommsize; 6315 /* check if reuse has been requested */ 6316 if (reuse) { 6317 if (*mat_n) { 6318 PetscMPIInt subcommsize2; 6319 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6320 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6321 comm_n = PetscObjectComm((PetscObject)*mat_n); 6322 } else { 6323 comm_n = PETSC_COMM_SELF; 6324 } 6325 } else { /* MAT_INITIAL_MATRIX */ 6326 PetscMPIInt rank; 6327 6328 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6329 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6330 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6331 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6332 comm_n = PetscSubcommChild(subcomm); 6333 } 6334 /* flag to destroy *mat_n if not significative */ 6335 if (color) destroy_mat = PETSC_TRUE; 6336 } else { 6337 comm_n = comm; 6338 } 6339 6340 /* prepare send/receive buffers */ 6341 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6342 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6343 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6344 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6345 if (nis) { 6346 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6347 } 6348 6349 /* Get data from local matrices */ 6350 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6351 /* TODO: See below some guidelines on how to prepare the local buffers */ 6352 /* 6353 send_buffer_vals should contain the raw values of the local matrix 6354 send_buffer_idxs should contain: 6355 - MatType_PRIVATE type 6356 - PetscInt size_of_l2gmap 6357 - PetscInt global_row_indices[size_of_l2gmap] 6358 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6359 */ 6360 else { 6361 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6362 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6363 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6364 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6365 send_buffer_idxs[1] = i; 6366 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6367 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6368 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6369 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6370 for (i=0;i<n_sends;i++) { 6371 ilengths_vals[is_indices[i]] = len*len; 6372 ilengths_idxs[is_indices[i]] = len+2; 6373 } 6374 } 6375 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6376 /* additional is (if any) */ 6377 if (nis) { 6378 PetscMPIInt psum; 6379 PetscInt j; 6380 for (j=0,psum=0;j<nis;j++) { 6381 PetscInt plen; 6382 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6383 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6384 psum += len+1; /* indices + lenght */ 6385 } 6386 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6387 for (j=0,psum=0;j<nis;j++) { 6388 PetscInt plen; 6389 const PetscInt *is_array_idxs; 6390 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6391 send_buffer_idxs_is[psum] = plen; 6392 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6393 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6394 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6395 psum += plen+1; /* indices + lenght */ 6396 } 6397 for (i=0;i<n_sends;i++) { 6398 ilengths_idxs_is[is_indices[i]] = psum; 6399 } 6400 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6401 } 6402 6403 buf_size_idxs = 0; 6404 buf_size_vals = 0; 6405 buf_size_idxs_is = 0; 6406 buf_size_vecs = 0; 6407 for (i=0;i<n_recvs;i++) { 6408 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6409 buf_size_vals += (PetscInt)olengths_vals[i]; 6410 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6411 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6412 } 6413 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6414 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6415 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6416 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6417 6418 /* get new tags for clean communications */ 6419 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6420 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6421 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6422 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6423 6424 /* allocate for requests */ 6425 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6426 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6427 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6428 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6429 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6430 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6431 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6432 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6433 6434 /* communications */ 6435 ptr_idxs = recv_buffer_idxs; 6436 ptr_vals = recv_buffer_vals; 6437 ptr_idxs_is = recv_buffer_idxs_is; 6438 ptr_vecs = recv_buffer_vecs; 6439 for (i=0;i<n_recvs;i++) { 6440 source_dest = onodes[i]; 6441 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6442 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6443 ptr_idxs += olengths_idxs[i]; 6444 ptr_vals += olengths_vals[i]; 6445 if (nis) { 6446 source_dest = onodes_is[i]; 6447 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6448 ptr_idxs_is += olengths_idxs_is[i]; 6449 } 6450 if (nvecs) { 6451 source_dest = onodes[i]; 6452 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6453 ptr_vecs += olengths_idxs[i]-2; 6454 } 6455 } 6456 for (i=0;i<n_sends;i++) { 6457 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6458 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6459 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6460 if (nis) { 6461 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6462 } 6463 if (nvecs) { 6464 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6465 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6466 } 6467 } 6468 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6469 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6470 6471 /* assemble new l2g map */ 6472 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6473 ptr_idxs = recv_buffer_idxs; 6474 new_local_rows = 0; 6475 for (i=0;i<n_recvs;i++) { 6476 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6477 ptr_idxs += olengths_idxs[i]; 6478 } 6479 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6480 ptr_idxs = recv_buffer_idxs; 6481 new_local_rows = 0; 6482 for (i=0;i<n_recvs;i++) { 6483 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6484 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6485 ptr_idxs += olengths_idxs[i]; 6486 } 6487 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6488 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6489 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6490 6491 /* infer new local matrix type from received local matrices type */ 6492 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6493 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6494 if (n_recvs) { 6495 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6496 ptr_idxs = recv_buffer_idxs; 6497 for (i=0;i<n_recvs;i++) { 6498 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6499 new_local_type_private = MATAIJ_PRIVATE; 6500 break; 6501 } 6502 ptr_idxs += olengths_idxs[i]; 6503 } 6504 switch (new_local_type_private) { 6505 case MATDENSE_PRIVATE: 6506 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6507 new_local_type = MATSEQAIJ; 6508 bs = 1; 6509 } else { /* if I receive only 1 dense matrix */ 6510 new_local_type = MATSEQDENSE; 6511 bs = 1; 6512 } 6513 break; 6514 case MATAIJ_PRIVATE: 6515 new_local_type = MATSEQAIJ; 6516 bs = 1; 6517 break; 6518 case MATBAIJ_PRIVATE: 6519 new_local_type = MATSEQBAIJ; 6520 break; 6521 case MATSBAIJ_PRIVATE: 6522 new_local_type = MATSEQSBAIJ; 6523 break; 6524 default: 6525 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6526 break; 6527 } 6528 } else { /* by default, new_local_type is seqdense */ 6529 new_local_type = MATSEQDENSE; 6530 bs = 1; 6531 } 6532 6533 /* create MATIS object if needed */ 6534 if (!reuse) { 6535 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6536 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6537 } else { 6538 /* it also destroys the local matrices */ 6539 if (*mat_n) { 6540 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6541 } else { /* this is a fake object */ 6542 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6543 } 6544 } 6545 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6546 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6547 6548 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6549 6550 /* Global to local map of received indices */ 6551 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6552 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6553 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6554 6555 /* restore attributes -> type of incoming data and its size */ 6556 buf_size_idxs = 0; 6557 for (i=0;i<n_recvs;i++) { 6558 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6559 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6560 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6561 } 6562 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6563 6564 /* set preallocation */ 6565 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6566 if (!newisdense) { 6567 PetscInt *new_local_nnz=0; 6568 6569 ptr_idxs = recv_buffer_idxs_local; 6570 if (n_recvs) { 6571 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6572 } 6573 for (i=0;i<n_recvs;i++) { 6574 PetscInt j; 6575 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6576 for (j=0;j<*(ptr_idxs+1);j++) { 6577 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6578 } 6579 } else { 6580 /* TODO */ 6581 } 6582 ptr_idxs += olengths_idxs[i]; 6583 } 6584 if (new_local_nnz) { 6585 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6586 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6587 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6588 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6589 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6590 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6591 } else { 6592 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6593 } 6594 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6595 } else { 6596 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6597 } 6598 6599 /* set values */ 6600 ptr_vals = recv_buffer_vals; 6601 ptr_idxs = recv_buffer_idxs_local; 6602 for (i=0;i<n_recvs;i++) { 6603 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6604 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6605 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6606 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6607 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6608 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6609 } else { 6610 /* TODO */ 6611 } 6612 ptr_idxs += olengths_idxs[i]; 6613 ptr_vals += olengths_vals[i]; 6614 } 6615 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6616 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6617 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6618 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6619 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6620 6621 #if 0 6622 if (!restrict_comm) { /* check */ 6623 Vec lvec,rvec; 6624 PetscReal infty_error; 6625 6626 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6627 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6628 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6629 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6630 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6631 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6632 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6633 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6634 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6635 } 6636 #endif 6637 6638 /* assemble new additional is (if any) */ 6639 if (nis) { 6640 PetscInt **temp_idxs,*count_is,j,psum; 6641 6642 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6643 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6644 ptr_idxs = recv_buffer_idxs_is; 6645 psum = 0; 6646 for (i=0;i<n_recvs;i++) { 6647 for (j=0;j<nis;j++) { 6648 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6649 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6650 psum += plen; 6651 ptr_idxs += plen+1; /* shift pointer to received data */ 6652 } 6653 } 6654 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6655 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6656 for (i=1;i<nis;i++) { 6657 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6658 } 6659 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6660 ptr_idxs = recv_buffer_idxs_is; 6661 for (i=0;i<n_recvs;i++) { 6662 for (j=0;j<nis;j++) { 6663 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6664 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6665 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6666 ptr_idxs += plen+1; /* shift pointer to received data */ 6667 } 6668 } 6669 for (i=0;i<nis;i++) { 6670 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6671 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6672 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6673 } 6674 ierr = PetscFree(count_is);CHKERRQ(ierr); 6675 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6676 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6677 } 6678 /* free workspace */ 6679 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6680 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6681 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6682 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6683 if (isdense) { 6684 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6685 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6686 } else { 6687 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6688 } 6689 if (nis) { 6690 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6691 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6692 } 6693 6694 if (nvecs) { 6695 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6696 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6697 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6698 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6699 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6700 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6701 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6702 /* set values */ 6703 ptr_vals = recv_buffer_vecs; 6704 ptr_idxs = recv_buffer_idxs_local; 6705 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6706 for (i=0;i<n_recvs;i++) { 6707 PetscInt j; 6708 for (j=0;j<*(ptr_idxs+1);j++) { 6709 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6710 } 6711 ptr_idxs += olengths_idxs[i]; 6712 ptr_vals += olengths_idxs[i]-2; 6713 } 6714 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6715 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6716 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6717 } 6718 6719 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6720 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6721 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6722 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6723 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6724 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6725 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6726 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6727 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6728 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6729 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6730 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6731 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6732 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6733 ierr = PetscFree(onodes);CHKERRQ(ierr); 6734 if (nis) { 6735 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6736 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6737 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6738 } 6739 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6740 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6741 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6742 for (i=0;i<nis;i++) { 6743 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6744 } 6745 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6746 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6747 } 6748 *mat_n = NULL; 6749 } 6750 PetscFunctionReturn(0); 6751 } 6752 6753 /* temporary hack into ksp private data structure */ 6754 #include <petsc/private/kspimpl.h> 6755 6756 #undef __FUNCT__ 6757 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6758 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6759 { 6760 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6761 PC_IS *pcis = (PC_IS*)pc->data; 6762 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6763 Mat coarsedivudotp = NULL; 6764 MatNullSpace CoarseNullSpace = NULL; 6765 ISLocalToGlobalMapping coarse_islg; 6766 IS coarse_is,*isarray; 6767 PetscInt i,im_active=-1,active_procs=-1; 6768 PetscInt nis,nisdofs,nisneu,nisvert; 6769 PC pc_temp; 6770 PCType coarse_pc_type; 6771 KSPType coarse_ksp_type; 6772 PetscBool multilevel_requested,multilevel_allowed; 6773 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6774 Mat t_coarse_mat_is; 6775 PetscInt ncoarse; 6776 PetscBool compute_vecs = PETSC_FALSE; 6777 PetscScalar *array; 6778 MatReuse coarse_mat_reuse; 6779 PetscBool restr, full_restr, have_void; 6780 PetscErrorCode ierr; 6781 6782 PetscFunctionBegin; 6783 /* Assign global numbering to coarse dofs */ 6784 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6785 PetscInt ocoarse_size; 6786 compute_vecs = PETSC_TRUE; 6787 ocoarse_size = pcbddc->coarse_size; 6788 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6789 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6790 /* see if we can avoid some work */ 6791 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6792 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6793 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6794 PC pc; 6795 PetscBool isbddc; 6796 6797 /* temporary workaround since PCBDDC does not have a reset method so far */ 6798 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6799 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6800 if (isbddc) { 6801 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6802 } else { 6803 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6804 } 6805 coarse_reuse = PETSC_FALSE; 6806 } else { /* we can safely reuse already computed coarse matrix */ 6807 coarse_reuse = PETSC_TRUE; 6808 } 6809 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6810 coarse_reuse = PETSC_FALSE; 6811 } 6812 /* reset any subassembling information */ 6813 if (!coarse_reuse || pcbddc->recompute_topography) { 6814 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6815 } 6816 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6817 coarse_reuse = PETSC_TRUE; 6818 } 6819 /* assemble coarse matrix */ 6820 if (coarse_reuse && pcbddc->coarse_ksp) { 6821 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6822 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6823 coarse_mat_reuse = MAT_REUSE_MATRIX; 6824 } else { 6825 coarse_mat = NULL; 6826 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6827 } 6828 6829 /* creates temporary l2gmap and IS for coarse indexes */ 6830 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6831 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6832 6833 /* creates temporary MATIS object for coarse matrix */ 6834 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6835 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6836 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6837 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6838 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6839 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6840 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6841 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6842 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6843 6844 /* count "active" (i.e. with positive local size) and "void" processes */ 6845 im_active = !!(pcis->n); 6846 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6847 6848 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6849 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6850 /* full_restr : just use the receivers from the subassembling pattern */ 6851 coarse_mat_is = NULL; 6852 multilevel_allowed = PETSC_FALSE; 6853 multilevel_requested = PETSC_FALSE; 6854 full_restr = PETSC_TRUE; 6855 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6856 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6857 if (multilevel_requested) { 6858 ncoarse = active_procs/pcbddc->coarsening_ratio; 6859 restr = PETSC_FALSE; 6860 full_restr = PETSC_FALSE; 6861 } else { 6862 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6863 restr = PETSC_TRUE; 6864 full_restr = PETSC_TRUE; 6865 } 6866 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6867 ncoarse = PetscMax(1,ncoarse); 6868 if (!pcbddc->coarse_subassembling) { 6869 if (pcbddc->coarsening_ratio > 1) { 6870 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6871 } else { 6872 PetscMPIInt size,rank; 6873 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6874 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6875 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6876 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6877 } 6878 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6879 PetscInt psum; 6880 PetscMPIInt size; 6881 if (pcbddc->coarse_ksp) psum = 1; 6882 else psum = 0; 6883 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6884 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6885 if (ncoarse < size) have_void = PETSC_TRUE; 6886 } 6887 /* determine if we can go multilevel */ 6888 if (multilevel_requested) { 6889 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6890 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6891 } 6892 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6893 6894 /* dump subassembling pattern */ 6895 if (pcbddc->dbg_flag && multilevel_allowed) { 6896 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6897 } 6898 6899 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6900 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6901 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6902 const PetscInt *idxs; 6903 ISLocalToGlobalMapping tmap; 6904 6905 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6906 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6907 /* allocate space for temporary storage */ 6908 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6909 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 6910 /* allocate for IS array */ 6911 nisdofs = pcbddc->n_ISForDofsLocal; 6912 nisneu = !!pcbddc->NeumannBoundariesLocal; 6913 nisvert = 0; /* nisvert is not used */ 6914 nis = nisdofs + nisneu + nisvert; 6915 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 6916 /* dofs splitting */ 6917 for (i=0;i<nisdofs;i++) { 6918 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 6919 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 6920 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6921 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6922 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6923 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6924 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6925 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 6926 } 6927 /* neumann boundaries */ 6928 if (pcbddc->NeumannBoundariesLocal) { 6929 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 6930 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 6931 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6932 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6933 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6934 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6935 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 6936 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 6937 } 6938 /* free memory */ 6939 ierr = PetscFree(tidxs);CHKERRQ(ierr); 6940 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 6941 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6942 } else { 6943 nis = 0; 6944 nisdofs = 0; 6945 nisneu = 0; 6946 nisvert = 0; 6947 isarray = NULL; 6948 } 6949 /* destroy no longer needed map */ 6950 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6951 6952 /* subassemble */ 6953 if (multilevel_allowed) { 6954 Vec vp[1]; 6955 PetscInt nvecs = 0; 6956 PetscBool reuse,reuser; 6957 6958 if (coarse_mat) reuse = PETSC_TRUE; 6959 else reuse = PETSC_FALSE; 6960 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6961 vp[0] = NULL; 6962 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6963 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6964 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6965 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6966 nvecs = 1; 6967 6968 if (pcbddc->divudotp) { 6969 Mat B,loc_divudotp; 6970 Vec v,p; 6971 IS dummy; 6972 PetscInt np; 6973 6974 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6975 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6976 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6977 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6978 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6979 ierr = VecSet(p,1.);CHKERRQ(ierr); 6980 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6981 ierr = VecDestroy(&p);CHKERRQ(ierr); 6982 ierr = MatDestroy(&B);CHKERRQ(ierr); 6983 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6984 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6985 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6986 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6987 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6988 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6989 ierr = VecDestroy(&v);CHKERRQ(ierr); 6990 } 6991 } 6992 if (reuser) { 6993 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6994 } else { 6995 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6996 } 6997 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6998 PetscScalar *arraym,*arrayv; 6999 PetscInt nl; 7000 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7001 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7002 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7003 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7004 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7005 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7006 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7007 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7008 } else { 7009 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7010 } 7011 } else { 7012 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 7013 } 7014 if (coarse_mat_is || coarse_mat) { 7015 PetscMPIInt size; 7016 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 7017 if (!multilevel_allowed) { 7018 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7019 } else { 7020 Mat A; 7021 7022 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7023 if (coarse_mat_is) { 7024 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7025 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7026 coarse_mat = coarse_mat_is; 7027 } 7028 /* be sure we don't have MatSeqDENSE as local mat */ 7029 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7030 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7031 } 7032 } 7033 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7034 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7035 7036 /* create local to global scatters for coarse problem */ 7037 if (compute_vecs) { 7038 PetscInt lrows; 7039 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7040 if (coarse_mat) { 7041 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7042 } else { 7043 lrows = 0; 7044 } 7045 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7046 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7047 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7048 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7049 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7050 } 7051 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7052 7053 /* set defaults for coarse KSP and PC */ 7054 if (multilevel_allowed) { 7055 coarse_ksp_type = KSPRICHARDSON; 7056 coarse_pc_type = PCBDDC; 7057 } else { 7058 coarse_ksp_type = KSPPREONLY; 7059 coarse_pc_type = PCREDUNDANT; 7060 } 7061 7062 /* print some info if requested */ 7063 if (pcbddc->dbg_flag) { 7064 if (!multilevel_allowed) { 7065 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7066 if (multilevel_requested) { 7067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7068 } else if (pcbddc->max_levels) { 7069 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7070 } 7071 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7072 } 7073 } 7074 7075 /* create the coarse KSP object only once with defaults */ 7076 if (coarse_mat) { 7077 PetscViewer dbg_viewer = NULL; 7078 if (pcbddc->dbg_flag) { 7079 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7080 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7081 } 7082 if (!pcbddc->coarse_ksp) { 7083 char prefix[256],str_level[16]; 7084 size_t len; 7085 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7086 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7087 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7088 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7089 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7090 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7091 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7092 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7093 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7094 /* prefix */ 7095 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7096 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7097 if (!pcbddc->current_level) { 7098 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7099 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7100 } else { 7101 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7102 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7103 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7104 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7105 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7106 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7107 } 7108 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7109 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7110 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7111 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7112 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7113 /* allow user customization */ 7114 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7115 } 7116 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7117 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7118 if (nisdofs) { 7119 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7120 for (i=0;i<nisdofs;i++) { 7121 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7122 } 7123 } 7124 if (nisneu) { 7125 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7126 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7127 } 7128 if (nisvert) { 7129 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7130 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7131 } 7132 7133 /* get some info after set from options */ 7134 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7135 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7136 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7137 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7138 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7139 isbddc = PETSC_FALSE; 7140 } 7141 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7142 if (isredundant) { 7143 KSP inner_ksp; 7144 PC inner_pc; 7145 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7146 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7147 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7148 } 7149 7150 /* parameters which miss an API */ 7151 if (isbddc) { 7152 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7153 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7154 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7155 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7156 if (pcbddc_coarse->benign_saddle_point) { 7157 Mat coarsedivudotp_is; 7158 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7159 IS row,col; 7160 const PetscInt *gidxs; 7161 PetscInt n,st,M,N; 7162 7163 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7164 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7165 st = st-n; 7166 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7167 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7168 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7169 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7170 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7171 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7172 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7173 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7174 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7175 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7176 ierr = ISDestroy(&row);CHKERRQ(ierr); 7177 ierr = ISDestroy(&col);CHKERRQ(ierr); 7178 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7179 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7180 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7181 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7182 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7183 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7184 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7185 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7186 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7187 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7188 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7189 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7190 } 7191 } 7192 7193 /* propagate symmetry info of coarse matrix */ 7194 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7195 if (pc->pmat->symmetric_set) { 7196 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7197 } 7198 if (pc->pmat->hermitian_set) { 7199 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7200 } 7201 if (pc->pmat->spd_set) { 7202 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7203 } 7204 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7205 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7206 } 7207 /* set operators */ 7208 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7209 if (pcbddc->dbg_flag) { 7210 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7211 } 7212 } 7213 ierr = PetscFree(isarray);CHKERRQ(ierr); 7214 #if 0 7215 { 7216 PetscViewer viewer; 7217 char filename[256]; 7218 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7219 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7220 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7221 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7222 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7223 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7224 } 7225 #endif 7226 7227 if (pcbddc->coarse_ksp) { 7228 Vec crhs,csol; 7229 7230 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7231 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7232 if (!csol) { 7233 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7234 } 7235 if (!crhs) { 7236 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7237 } 7238 } 7239 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7240 7241 /* compute null space for coarse solver if the benign trick has been requested */ 7242 if (pcbddc->benign_null) { 7243 7244 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7245 for (i=0;i<pcbddc->benign_n;i++) { 7246 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7247 } 7248 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7249 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7250 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7251 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7252 if (coarse_mat) { 7253 Vec nullv; 7254 PetscScalar *array,*array2; 7255 PetscInt nl; 7256 7257 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7258 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7259 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7260 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7261 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7262 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7263 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7264 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7265 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7266 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7267 } 7268 } 7269 7270 if (pcbddc->coarse_ksp) { 7271 PetscBool ispreonly; 7272 7273 if (CoarseNullSpace) { 7274 PetscBool isnull; 7275 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7276 if (isnull) { 7277 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7278 } 7279 /* TODO: add local nullspaces (if any) */ 7280 } 7281 /* setup coarse ksp */ 7282 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7283 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7284 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7285 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7286 KSP check_ksp; 7287 KSPType check_ksp_type; 7288 PC check_pc; 7289 Vec check_vec,coarse_vec; 7290 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7291 PetscInt its; 7292 PetscBool compute_eigs; 7293 PetscReal *eigs_r,*eigs_c; 7294 PetscInt neigs; 7295 const char *prefix; 7296 7297 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7298 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7299 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7300 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7301 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7302 /* prevent from setup unneeded object */ 7303 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7304 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7305 if (ispreonly) { 7306 check_ksp_type = KSPPREONLY; 7307 compute_eigs = PETSC_FALSE; 7308 } else { 7309 check_ksp_type = KSPGMRES; 7310 compute_eigs = PETSC_TRUE; 7311 } 7312 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7313 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7314 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7315 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7316 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7317 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7318 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7319 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7320 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7321 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7322 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7323 /* create random vec */ 7324 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7325 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7326 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7327 /* solve coarse problem */ 7328 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7329 /* set eigenvalue estimation if preonly has not been requested */ 7330 if (compute_eigs) { 7331 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7332 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7333 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7334 if (neigs) { 7335 lambda_max = eigs_r[neigs-1]; 7336 lambda_min = eigs_r[0]; 7337 if (pcbddc->use_coarse_estimates) { 7338 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7339 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7340 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7341 } 7342 } 7343 } 7344 } 7345 7346 /* check coarse problem residual error */ 7347 if (pcbddc->dbg_flag) { 7348 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7349 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7350 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7351 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7352 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7353 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7354 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7355 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7356 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7357 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7358 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7359 if (CoarseNullSpace) { 7360 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7361 } 7362 if (compute_eigs) { 7363 PetscReal lambda_max_s,lambda_min_s; 7364 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7365 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7366 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7367 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7368 for (i=0;i<neigs;i++) { 7369 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7370 } 7371 } 7372 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7373 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7374 } 7375 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7376 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7377 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7378 if (compute_eigs) { 7379 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7380 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7381 } 7382 } 7383 } 7384 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7385 /* print additional info */ 7386 if (pcbddc->dbg_flag) { 7387 /* waits until all processes reaches this point */ 7388 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7389 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7390 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7391 } 7392 7393 /* free memory */ 7394 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7395 PetscFunctionReturn(0); 7396 } 7397 7398 #undef __FUNCT__ 7399 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7400 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7401 { 7402 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7403 PC_IS* pcis = (PC_IS*)pc->data; 7404 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7405 IS subset,subset_mult,subset_n; 7406 PetscInt local_size,coarse_size=0; 7407 PetscInt *local_primal_indices=NULL; 7408 const PetscInt *t_local_primal_indices; 7409 PetscErrorCode ierr; 7410 7411 PetscFunctionBegin; 7412 /* Compute global number of coarse dofs */ 7413 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7414 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7415 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7416 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7417 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7418 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7419 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7420 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7421 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7422 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7423 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7424 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7425 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7426 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7427 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7428 7429 /* check numbering */ 7430 if (pcbddc->dbg_flag) { 7431 PetscScalar coarsesum,*array,*array2; 7432 PetscInt i; 7433 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7434 7435 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7436 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7437 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7438 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7439 /* counter */ 7440 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7441 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7442 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7443 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7444 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7445 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7446 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7447 for (i=0;i<pcbddc->local_primal_size;i++) { 7448 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7449 } 7450 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7451 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7452 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7453 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7454 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7455 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7456 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7457 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7458 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7459 for (i=0;i<pcis->n;i++) { 7460 if (array[i] != 0.0 && array[i] != array2[i]) { 7461 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7462 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7463 set_error = PETSC_TRUE; 7464 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7465 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7466 } 7467 } 7468 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7469 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7470 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7471 for (i=0;i<pcis->n;i++) { 7472 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7473 } 7474 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7475 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7476 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7477 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7478 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7479 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7480 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7481 PetscInt *gidxs; 7482 7483 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7484 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7485 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7486 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7487 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7488 for (i=0;i<pcbddc->local_primal_size;i++) { 7489 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7490 } 7491 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7492 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7493 } 7494 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7495 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7496 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7497 } 7498 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7499 /* get back data */ 7500 *coarse_size_n = coarse_size; 7501 *local_primal_indices_n = local_primal_indices; 7502 PetscFunctionReturn(0); 7503 } 7504 7505 #undef __FUNCT__ 7506 #define __FUNCT__ "PCBDDCGlobalToLocal" 7507 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7508 { 7509 IS localis_t; 7510 PetscInt i,lsize,*idxs,n; 7511 PetscScalar *vals; 7512 PetscErrorCode ierr; 7513 7514 PetscFunctionBegin; 7515 /* get indices in local ordering exploiting local to global map */ 7516 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7517 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7518 for (i=0;i<lsize;i++) vals[i] = 1.0; 7519 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7520 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7521 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7522 if (idxs) { /* multilevel guard */ 7523 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7524 } 7525 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7526 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7527 ierr = PetscFree(vals);CHKERRQ(ierr); 7528 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7529 /* now compute set in local ordering */ 7530 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7531 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7532 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7533 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7534 for (i=0,lsize=0;i<n;i++) { 7535 if (PetscRealPart(vals[i]) > 0.5) { 7536 lsize++; 7537 } 7538 } 7539 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7540 for (i=0,lsize=0;i<n;i++) { 7541 if (PetscRealPart(vals[i]) > 0.5) { 7542 idxs[lsize++] = i; 7543 } 7544 } 7545 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7546 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7547 *localis = localis_t; 7548 PetscFunctionReturn(0); 7549 } 7550 7551 #undef __FUNCT__ 7552 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7553 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7554 { 7555 PC_IS *pcis=(PC_IS*)pc->data; 7556 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7557 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7558 Mat S_j; 7559 PetscInt *used_xadj,*used_adjncy; 7560 PetscBool free_used_adj; 7561 PetscErrorCode ierr; 7562 7563 PetscFunctionBegin; 7564 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7565 free_used_adj = PETSC_FALSE; 7566 if (pcbddc->sub_schurs_layers == -1) { 7567 used_xadj = NULL; 7568 used_adjncy = NULL; 7569 } else { 7570 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7571 used_xadj = pcbddc->mat_graph->xadj; 7572 used_adjncy = pcbddc->mat_graph->adjncy; 7573 } else if (pcbddc->computed_rowadj) { 7574 used_xadj = pcbddc->mat_graph->xadj; 7575 used_adjncy = pcbddc->mat_graph->adjncy; 7576 } else { 7577 PetscBool flg_row=PETSC_FALSE; 7578 const PetscInt *xadj,*adjncy; 7579 PetscInt nvtxs; 7580 7581 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7582 if (flg_row) { 7583 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7584 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7585 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7586 free_used_adj = PETSC_TRUE; 7587 } else { 7588 pcbddc->sub_schurs_layers = -1; 7589 used_xadj = NULL; 7590 used_adjncy = NULL; 7591 } 7592 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7593 } 7594 } 7595 7596 /* setup sub_schurs data */ 7597 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7598 if (!sub_schurs->schur_explicit) { 7599 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7600 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7601 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7602 } else { 7603 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7604 PetscBool isseqaij,need_change = PETSC_FALSE; 7605 PetscInt benign_n; 7606 Mat change = NULL; 7607 Vec scaling = NULL; 7608 IS change_primal = NULL; 7609 7610 if (!pcbddc->use_vertices && reuse_solvers) { 7611 PetscInt n_vertices; 7612 7613 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7614 reuse_solvers = (PetscBool)!n_vertices; 7615 } 7616 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7617 if (!isseqaij) { 7618 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7619 if (matis->A == pcbddc->local_mat) { 7620 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7621 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7622 } else { 7623 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7624 } 7625 } 7626 if (!pcbddc->benign_change_explicit) { 7627 benign_n = pcbddc->benign_n; 7628 } else { 7629 benign_n = 0; 7630 } 7631 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7632 We need a global reduction to avoid possible deadlocks. 7633 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7634 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7635 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7636 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7637 need_change = (PetscBool)(!need_change); 7638 } 7639 /* If the user defines additional constraints, we import them here. 7640 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7641 if (need_change) { 7642 PC_IS *pcisf; 7643 PC_BDDC *pcbddcf; 7644 PC pcf; 7645 7646 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7647 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7648 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7649 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7650 /* hacks */ 7651 pcisf = (PC_IS*)pcf->data; 7652 pcisf->is_B_local = pcis->is_B_local; 7653 pcisf->vec1_N = pcis->vec1_N; 7654 pcisf->BtoNmap = pcis->BtoNmap; 7655 pcisf->n = pcis->n; 7656 pcisf->n_B = pcis->n_B; 7657 pcbddcf = (PC_BDDC*)pcf->data; 7658 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7659 pcbddcf->mat_graph = pcbddc->mat_graph; 7660 pcbddcf->use_faces = PETSC_TRUE; 7661 pcbddcf->use_change_of_basis = PETSC_TRUE; 7662 pcbddcf->use_change_on_faces = PETSC_TRUE; 7663 pcbddcf->use_qr_single = PETSC_TRUE; 7664 pcbddcf->fake_change = PETSC_TRUE; 7665 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7666 /* store information on primal vertices and change of basis (in local numbering) */ 7667 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7668 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7669 change = pcbddcf->ConstraintMatrix; 7670 pcbddcf->ConstraintMatrix = NULL; 7671 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7672 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7673 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7674 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7675 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7676 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7677 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7678 pcf->ops->destroy = NULL; 7679 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7680 } 7681 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7682 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7683 ierr = MatDestroy(&change);CHKERRQ(ierr); 7684 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7685 } 7686 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7687 7688 /* free adjacency */ 7689 if (free_used_adj) { 7690 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7691 } 7692 PetscFunctionReturn(0); 7693 } 7694 7695 #undef __FUNCT__ 7696 #define __FUNCT__ "PCBDDCInitSubSchurs" 7697 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7698 { 7699 PC_IS *pcis=(PC_IS*)pc->data; 7700 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7701 PCBDDCGraph graph; 7702 PetscErrorCode ierr; 7703 7704 PetscFunctionBegin; 7705 /* attach interface graph for determining subsets */ 7706 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7707 IS verticesIS,verticescomm; 7708 PetscInt vsize,*idxs; 7709 7710 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7711 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7712 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7713 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7714 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7715 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7716 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7717 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7718 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7719 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7720 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7721 } else { 7722 graph = pcbddc->mat_graph; 7723 } 7724 /* print some info */ 7725 if (pcbddc->dbg_flag) { 7726 IS vertices; 7727 PetscInt nv,nedges,nfaces; 7728 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7729 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7730 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7731 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7732 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7733 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7734 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7735 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7736 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7737 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7738 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7739 } 7740 7741 /* sub_schurs init */ 7742 if (!pcbddc->sub_schurs) { 7743 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7744 } 7745 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 7746 7747 /* free graph struct */ 7748 if (pcbddc->sub_schurs_rebuild) { 7749 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7750 } 7751 PetscFunctionReturn(0); 7752 } 7753 7754 #undef __FUNCT__ 7755 #define __FUNCT__ "PCBDDCCheckOperator" 7756 PetscErrorCode PCBDDCCheckOperator(PC pc) 7757 { 7758 PC_IS *pcis=(PC_IS*)pc->data; 7759 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7760 PetscErrorCode ierr; 7761 7762 PetscFunctionBegin; 7763 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7764 IS zerodiag = NULL; 7765 Mat S_j,B0_B=NULL; 7766 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7767 PetscScalar *p0_check,*array,*array2; 7768 PetscReal norm; 7769 PetscInt i; 7770 7771 /* B0 and B0_B */ 7772 if (zerodiag) { 7773 IS dummy; 7774 7775 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7776 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7777 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7778 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7779 } 7780 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7781 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7782 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7783 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7784 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7785 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7786 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7787 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7788 /* S_j */ 7789 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7790 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7791 7792 /* mimic vector in \widetilde{W}_\Gamma */ 7793 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7794 /* continuous in primal space */ 7795 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7796 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7797 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7798 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7799 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7800 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7801 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7802 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7803 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7804 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7805 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7806 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7807 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7808 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7809 7810 /* assemble rhs for coarse problem */ 7811 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7812 /* local with Schur */ 7813 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7814 if (zerodiag) { 7815 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7816 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7817 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7818 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7819 } 7820 /* sum on primal nodes the local contributions */ 7821 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7822 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7823 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7824 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7825 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7826 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7827 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7828 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7829 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7830 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7831 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7832 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7833 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7834 /* scale primal nodes (BDDC sums contibutions) */ 7835 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7836 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7837 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7838 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7839 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7840 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7841 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7842 /* global: \widetilde{B0}_B w_\Gamma */ 7843 if (zerodiag) { 7844 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7845 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7846 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7847 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7848 } 7849 /* BDDC */ 7850 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7851 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7852 7853 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7854 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7855 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7856 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7857 for (i=0;i<pcbddc->benign_n;i++) { 7858 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7859 } 7860 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7861 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7862 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7863 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7864 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7865 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7866 } 7867 PetscFunctionReturn(0); 7868 } 7869