1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 if (!esize) PetscFunctionReturn(0); 84 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 85 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 86 87 /* gradients */ 88 ptr = work + 5*esize; 89 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 90 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 91 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 92 ierr = MatDestroy(&GE);CHKERRQ(ierr); 93 94 /* constants */ 95 ptr += rsize*csize; 96 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 97 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 99 ierr = MatDestroy(&GE);CHKERRQ(ierr); 100 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 101 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "PCBDDCNedelecSupport" 107 PetscErrorCode PCBDDCNedelecSupport(PC pc) 108 { 109 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 110 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 111 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 112 MatNullSpace nnsp; 113 Vec tvec,*quads; 114 PetscSF sfv; 115 ISLocalToGlobalMapping el2g,vl2g,fl2g; 116 MPI_Comm comm; 117 IS lned,primals,allprimals,nedfieldlocal; 118 IS *eedges,*extrows,*extcols,*alleedges; 119 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee; 120 PetscScalar *vals,*work; 121 PetscReal *rwork; 122 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 123 PetscInt ne,nv,Lv,order,n,field; 124 PetscInt n_neigh,*neigh,*n_shared,**shared; 125 PetscInt i,j,extmem,cum,maxsize,rst,nee,nquads=2; 126 PetscInt *extrow,*extrowcum,*marks,*emarks,*vmarks,*gidxs; 127 PetscInt *sfvleaves,*sfvroots; 128 PetscBool print,eerr,done,lrc[2],conforming; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 /* test variable order code and print debug info TODO: to be removed */ 133 print = PETSC_FALSE; 134 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 135 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 136 137 /* Return to caller if there are no edges in the decomposition */ 138 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 139 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 140 ierr = ISLocalToGlobalMappingGetSize(el2g,&n);CHKERRQ(ierr); 141 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 142 lrc[0] = PETSC_FALSE; 143 for (i=0;i<n;i++) { 144 if (PetscRealPart(vals[i]) > 2.) { 145 lrc[0] = PETSC_TRUE; 146 break; 147 } 148 } 149 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 150 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 151 if (!lrc[1]) PetscFunctionReturn(0); 152 153 /* Get discrete gradient 154 If it is defined for a subset of dofs, assumes G is given in global ordering for all the dofs */ 155 G = pcbddc->discretegradient; 156 order = pcbddc->nedorder; 157 conforming = pcbddc->conforming; 158 field = pcbddc->nedfield; 159 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 160 if (pcbddc->n_ISForDofsLocal && field > -1) { 161 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 162 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 163 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 164 } else { 165 PetscBool testnedfield = PETSC_FALSE; 166 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 167 if (!testnedfield) { 168 ne = n; 169 nedfieldlocal = NULL; 170 } else { 171 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 172 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 173 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 174 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 175 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 176 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 177 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 178 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 179 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 180 for (i=0,cum=0;i<n;i++) { 181 if (matis->sf_leafdata[i] > 1) { 182 matis->sf_leafdata[cum++] = i; 183 } 184 } 185 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 186 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 187 } 188 } 189 190 if (nedfieldlocal) { /* merge with previous code when testing is done */ 191 IS is; 192 193 /* need to map from the local Nedelec field to local numbering */ 194 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 195 /* need to map from the local Nedelec field to global numbering */ 196 ierr = ISLocalToGlobalMappingApplyIS(el2g,nedfieldlocal,&is);CHKERRQ(ierr); 197 ierr = ISLocalToGlobalMappingCreateIS(is,&el2g);CHKERRQ(ierr); 198 ierr = ISDestroy(&is);CHKERRQ(ierr); 199 } else { 200 ierr = PetscObjectReference((PetscObject)el2g);CHKERRQ(ierr); 201 fl2g = NULL; 202 } 203 204 /* Sanity checks */ 205 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 206 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 207 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 208 209 /* Drop connections for interior edges (this modifies G) */ 210 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 211 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 212 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 213 if (nedfieldlocal) { 214 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 215 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 216 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 217 } else { 218 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 219 } 220 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 221 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 222 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 223 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 224 if (matis->sf_rootdata[i] < 2) { 225 matis->sf_rootdata[cum++] = i + rst; 226 } 227 } 228 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 229 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 230 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 231 232 /* Extract subdomain relevant rows of G */ 233 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 234 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 235 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 236 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 237 ierr = ISDestroy(&lned);CHKERRQ(ierr); 238 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 239 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 240 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 241 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 242 if (print) { 243 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 244 ierr = MatView(lG,NULL);CHKERRQ(ierr); 245 } 246 247 /* SF for nodal communications */ 248 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 249 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 250 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 251 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 252 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 253 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 254 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 255 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 257 258 /* Destroy temporary G created in MATIS format */ 259 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 260 261 /* Save lG */ 262 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 263 264 /* Analyze the edge-nodes connections (duplicate lG) */ 265 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 266 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 267 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 268 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 269 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 270 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 271 /* need to import the boundary specification to ensure the 272 proper detection of coarse edges' endpoints */ 273 if (pcbddc->DirichletBoundariesLocal) { 274 IS is; 275 276 if (fl2g) { 277 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 278 } else { 279 is = pcbddc->DirichletBoundariesLocal; 280 } 281 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 282 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 283 for (i=0;i<cum;i++) { 284 if (idxs[i] >= 0) { 285 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 286 } 287 } 288 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 289 if (fl2g) { 290 ierr = ISDestroy(&is);CHKERRQ(ierr); 291 } 292 } 293 if (pcbddc->NeumannBoundariesLocal) { 294 IS is; 295 296 if (fl2g) { 297 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 298 } else { 299 is = pcbddc->NeumannBoundariesLocal; 300 } 301 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 302 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 303 for (i=0;i<cum;i++) { 304 if (idxs[i] >= 0) { 305 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 306 } 307 } 308 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 309 if (fl2g) { 310 ierr = ISDestroy(&is);CHKERRQ(ierr); 311 } 312 } 313 314 /* need to remove coarse faces' dofs to ensure the 315 proper detection of coarse edges' endpoints */ 316 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 317 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 318 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 319 for (i=1;i<n_neigh;i++) 320 for (j=0;j<n_shared[i];j++) 321 marks[shared[i][j]]++; 322 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 323 for (i=0;i<ne;i++) { 324 if (marks[i] > 1 || (marks[i] == 1 && PetscBTLookup(btb,i))) { 325 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 326 } 327 } 328 329 if (!conforming) { 330 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 331 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 332 } 333 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 334 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 335 cum = 0; 336 for (i=0;i<ne;i++) { 337 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 338 if (!PetscBTLookup(btee,i)) { 339 marks[cum++] = i; 340 continue; 341 } 342 /* set badly connected edge dofs as primal */ 343 if (!conforming) { 344 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 345 marks[cum++] = i; 346 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 347 for (j=ii[i];j<ii[i+1];j++) { 348 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 349 } 350 } else { 351 /* every edge dofs should be connected trough a certain number of nodal dofs 352 to other edge dofs belonging to coarse edges 353 - at most 2 endpoints 354 - order-1 interior nodal dofs 355 - no undefined nodal dofs (nconn < order) 356 */ 357 PetscInt ends = 0,ints = 0, undef = 0; 358 for (j=ii[i];j<ii[i+1];j++) { 359 PetscInt v = jj[j],k; 360 PetscInt nconn = iit[v+1]-iit[v]; 361 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 362 if (nconn > order) ends++; 363 else if (nconn == order) ints++; 364 else undef++; 365 } 366 if (undef || ends > 2 || ints != order -1) { 367 marks[cum++] = i; 368 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 369 for (j=ii[i];j<ii[i+1];j++) { 370 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 371 } 372 } 373 } 374 } 375 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 376 if (!order && ii[i+1] != ii[i]) { 377 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 378 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 379 } 380 } 381 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 382 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 383 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 384 if (!conforming) { 385 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 386 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 387 } 388 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 389 /* identify splitpoints and corner candidates: TODO variable order */ 390 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 391 if (print) { 392 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 393 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 394 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 395 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 396 } 397 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 398 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 399 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 400 for (i=0;i<nv;i++) { 401 PetscInt ord = order, test = ii[i+1]-ii[i]; 402 if (!order) { 403 PetscReal vorder = 0.; 404 405 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 406 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 407 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 408 ord = 1; 409 } 410 #if defined(PETSC_USE_DEBUG) 411 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 412 #endif 413 if (test >= 3*ord) { /* splitpoints */ 414 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 415 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 416 } else if (test == ord) { 417 if (order == 1) { 418 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 419 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 420 } else { 421 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 422 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 423 } 424 } 425 } 426 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 427 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 428 429 /* Get the local G^T explicitly */ 430 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 431 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 432 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 433 434 /* Mark interior nodal dofs */ 435 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 436 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 437 for (i=1;i<n_neigh;i++) { 438 for (j=0;j<n_shared[i];j++) { 439 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 440 } 441 } 442 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 443 444 /* communicate corners and splitpoints */ 445 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 446 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 447 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 448 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 449 450 if (print) { 451 IS tbz; 452 453 cum = 0; 454 for (i=0;i<nv;i++) 455 if (sfvleaves[i]) 456 vmarks[cum++] = i; 457 458 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 459 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 460 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 461 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 462 } 463 464 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 465 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 466 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 467 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 468 469 /* Zero rows of lGt corresponding to identified corners 470 and interior nodal dofs */ 471 cum = 0; 472 for (i=0;i<nv;i++) { 473 if (sfvleaves[i]) { 474 vmarks[cum++] = i; 475 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 476 } 477 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 478 } 479 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 480 if (print) { 481 IS tbz; 482 483 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 484 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 485 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 486 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 487 } 488 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 489 ierr = PetscFree(vmarks);CHKERRQ(ierr); 490 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 491 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 492 493 /* Recompute G */ 494 ierr = MatDestroy(&lG);CHKERRQ(ierr); 495 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 496 if (print) { 497 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 498 ierr = MatView(lG,NULL);CHKERRQ(ierr); 499 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 500 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 501 } 502 503 /* Get primal dofs (if any) */ 504 cum = 0; 505 for (i=0;i<ne;i++) { 506 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 507 } 508 if (fl2g) { 509 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 510 } 511 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 512 if (print) { 513 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 514 ierr = ISView(primals,NULL);CHKERRQ(ierr); 515 } 516 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 517 /* TODO: what if the user passed in some of them ? */ 518 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 519 ierr = ISDestroy(&primals);CHKERRQ(ierr); 520 521 /* Compute edge connectivity */ 522 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 523 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 524 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 525 if (done && i && ii && jj) { /* when lG is empty, don't pass pointers */ 526 if (fl2g) { 527 PetscBT btf; 528 PetscInt *iia,*jja,*iiu,*jju; 529 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 530 531 /* create CSR for all local dofs */ 532 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 533 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 534 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 535 iiu = pcbddc->mat_graph->xadj; 536 jju = pcbddc->mat_graph->adjncy; 537 } else if (pcbddc->use_local_adj) { 538 rest = PETSC_TRUE; 539 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 540 } else { 541 free = PETSC_TRUE; 542 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 543 iiu[0] = 0; 544 for (i=0;i<n;i++) { 545 iiu[i+1] = i+1; 546 jju[i] = -1; 547 } 548 } 549 550 /* import sizes of CSR */ 551 iia[0] = 0; 552 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 553 554 /* overwrite entries corresponding to the Nedelec field */ 555 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 556 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 557 for (i=0;i<ne;i++) { 558 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 559 iia[idxs[i]+1] = ii[i+1]-ii[i]; 560 } 561 562 /* iia in CSR */ 563 for (i=0;i<n;i++) iia[i+1] += iia[i]; 564 565 /* jja in CSR */ 566 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 567 for (i=0;i<n;i++) 568 if (!PetscBTLookup(btf,i)) 569 for (j=0;j<iiu[i+1]-iiu[i];j++) 570 jja[iia[i]+j] = jju[iiu[i]+j]; 571 572 /* map edge dofs connectivity */ 573 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 574 for (i=0;i<ne;i++) { 575 PetscInt e = idxs[i]; 576 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 577 } 578 579 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 580 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 581 if (rest) { 582 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 583 } 584 if (free) { 585 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 586 } 587 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 588 } else { 589 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 590 } 591 } 592 593 /* Analyze interface for edge dofs */ 594 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 595 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 596 597 /* Get coarse edges in the edge space */ 598 pcbddc->mat_graph->twodim = PETSC_FALSE; 599 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 600 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 601 602 if (fl2g) { 603 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 604 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 605 for (i=0;i<nee;i++) { 606 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 607 } 608 } else { 609 eedges = alleedges; 610 primals = allprimals; 611 } 612 613 /* Mark fine edge dofs with their coarse edge id */ 614 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 615 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 616 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 617 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 618 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 619 if (print) { 620 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 621 ierr = ISView(primals,NULL);CHKERRQ(ierr); 622 } 623 624 maxsize = 0; 625 for (i=0;i<nee;i++) { 626 PetscInt size,mark = i+1; 627 628 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 629 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 630 for (j=0;j<size;j++) marks[idxs[j]] = mark; 631 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 632 maxsize = PetscMax(maxsize,size); 633 } 634 635 /* Find coarse edge endpoints */ 636 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 637 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 638 for (i=0;i<nee;i++) { 639 PetscInt mark = i+1,size; 640 641 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 642 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 643 if (print) { 644 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 645 ISView(eedges[i],NULL); 646 } 647 for (j=0;j<size;j++) { 648 PetscInt k, ee = idxs[j]; 649 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 650 for (k=ii[ee];k<ii[ee+1];k++) { 651 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 652 if (PetscBTLookup(btv,jj[k])) { 653 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 654 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 655 if (print) PetscPrintf(PETSC_COMM_SELF," candidate? %d\n",(PetscBool)(PetscBTLookup(btvcand,jj[k]))); 656 PetscInt k2; 657 PetscBool corner = PETSC_FALSE; 658 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 659 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 660 /* it's a corner if either is connected with an edge dof belonging to a different cc or 661 if the edge dof lie on the natural part of the boundary */ 662 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 663 corner = PETSC_TRUE; 664 break; 665 } 666 } 667 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 668 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 669 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 670 } else { 671 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 672 } 673 } 674 } 675 } 676 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 677 } 678 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 679 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 680 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 681 682 /* Reset marked primal dofs */ 683 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 684 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 685 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 686 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 687 688 /* Now use the initial lG */ 689 ierr = MatDestroy(&lG);CHKERRQ(ierr); 690 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 691 lG = lGinit; 692 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 693 694 /* Compute extended cols indices */ 695 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 696 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 697 i *= maxsize; 698 ierr = PetscMalloc1(nee,&extcols);CHKERRQ(ierr); 699 ierr = PetscCalloc1(nee,&emarks);CHKERRQ(ierr); 700 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 701 eerr = PETSC_FALSE; 702 for (i=0;i<nee;i++) { 703 PetscInt size; 704 705 cum = 0; 706 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 707 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 708 for (j=0;j<size;j++) { 709 PetscInt k,ee = idxs[j]; 710 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 711 } 712 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 713 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 714 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 715 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 716 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 717 /* it may happen that endpoints are not defined at this point 718 if it is the case, mark this edge for a second pass */ 719 if (cum != size -1) { 720 emarks[i] = 1; 721 if (print) { 722 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 723 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 724 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 725 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 726 } 727 eerr = PETSC_TRUE; 728 } 729 } 730 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 731 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 732 if (done) { 733 PetscInt *newprimals; 734 735 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 736 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 737 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 738 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 739 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 740 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 741 for (i=0;i<nee;i++) { 742 if (emarks[i]) { 743 PetscInt size,mark = i+1; 744 745 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 746 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 747 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 748 for (j=0;j<size;j++) { 749 PetscInt k,ee = idxs[j]; 750 for (k=ii[ee];k<ii[ee+1];k++) { 751 /* set all candidates located on the edge as corners */ 752 if (PetscBTLookup(btvcand,jj[k])) { 753 PetscInt k2,vv = jj[k]; 754 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 755 /* set all edge dofs connected to candidate as primals */ 756 for (k2=iit[vv];k2<iit[vv+1];k2++) { 757 if (marks[jjt[k2]] == mark) { 758 PetscInt k3,ee2 = jjt[k2]; 759 newprimals[cum++] = ee2; 760 /* finally set the new corners */ 761 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 762 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 763 } 764 } 765 } 766 } 767 } 768 } 769 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 770 } 771 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 772 } 773 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 774 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 775 if (fl2g) { 776 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 777 ierr = ISDestroy(&primals);CHKERRQ(ierr); 778 for (i=0;i<nee;i++) { 779 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 780 } 781 ierr = PetscFree(eedges);CHKERRQ(ierr); 782 } 783 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 784 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 785 ierr = PetscFree(newprimals);CHKERRQ(ierr); 786 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 787 ierr = ISDestroy(&primals);CHKERRQ(ierr); 788 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 789 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 790 if (fl2g) { 791 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 792 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 793 for (i=0;i<nee;i++) { 794 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 795 } 796 } else { 797 eedges = alleedges; 798 primals = allprimals; 799 } 800 801 /* Mark again */ 802 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 803 for (i=0;i<nee;i++) { 804 PetscInt size,mark = i+1; 805 806 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 807 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 808 for (j=0;j<size;j++) marks[idxs[j]] = mark; 809 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 810 } 811 if (print) { 812 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 813 ierr = ISView(primals,NULL);CHKERRQ(ierr); 814 } 815 816 /* Recompute extended cols */ 817 eerr = PETSC_FALSE; 818 for (i=0;i<nee;i++) { 819 PetscInt size; 820 821 cum = 0; 822 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 823 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 824 for (j=0;j<size;j++) { 825 PetscInt k,ee = idxs[j]; 826 for (k=ii[ee];k<ii[ee+1];k++) { 827 if (!PetscBTLookup(btv,jj[k])) { 828 extrow[cum++] = jj[k]; 829 } 830 } 831 } 832 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 833 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 834 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 835 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 836 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 837 if (cum != size -1) { 838 if (print) { 839 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 840 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 841 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 842 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 843 } 844 eerr = PETSC_TRUE; 845 } 846 } 847 } 848 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 849 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 850 ierr = PetscFree(emarks);CHKERRQ(ierr); 851 /* an error should not occur at this point */ 852 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 853 854 /* Check the number of endpoints */ 855 /* TODO: fix case for circular edge */ 856 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 857 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 858 for (i=0;i<nee;i++) { 859 PetscInt size, found = 0; 860 861 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 862 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 863 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 864 for (j=0;j<size;j++) { 865 PetscInt k,ee = idxs[j]; 866 for (k=ii[ee];k<ii[ee+1];k++) { 867 PetscInt vv = jj[k]; 868 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 869 found++; 870 } 871 } 872 } 873 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 874 if (found != 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d\n",found,i); 875 } 876 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 877 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 878 879 #if defined(PETSC_USE_DEBUG) 880 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 881 not interfere with neighbouring coarse edges */ 882 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 883 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 884 for (i=0;i<nv;i++) { 885 PetscInt emax = 0,eemax = 0; 886 887 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 888 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 889 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 890 for (j=1;j<nee+1;j++) { 891 if (emax < emarks[j]) { 892 emax = emarks[j]; 893 eemax = j; 894 } 895 } 896 /* not relevant for edges */ 897 if (!eemax) continue; 898 899 for (j=ii[i];j<ii[i+1];j++) { 900 if (marks[jj[j]] && marks[jj[j]] != eemax) { 901 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 902 } 903 } 904 } 905 ierr = PetscFree(emarks);CHKERRQ(ierr); 906 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 907 #endif 908 909 /* Compute extended rows indices for edge blocks of the change of basis */ 910 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 911 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 912 extmem *= maxsize; 913 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 914 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 915 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 916 for (i=0;i<nv;i++) { 917 PetscInt mark = 0,size,start; 918 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 919 for (j=ii[i];j<ii[i+1];j++) 920 if (marks[jj[j]] && !mark) 921 mark = marks[jj[j]]; 922 923 /* not relevant */ 924 if (!mark) continue; 925 926 /* import extended row */ 927 mark--; 928 start = mark*extmem+extrowcum[mark]; 929 size = ii[i+1]-ii[i]; 930 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 931 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 932 extrowcum[mark] += size; 933 } 934 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 935 cum = 0; 936 for (i=0;i<nee;i++) { 937 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 938 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 939 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 940 cum = PetscMax(cum,size); 941 } 942 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 943 ierr = PetscFree(marks);CHKERRQ(ierr); 944 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 945 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 946 947 /* Workspace for lapack inner calls and VecSetValues */ 948 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 949 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 950 for (i=0;i<maxsize;i++) vals[i] = 1.; 951 952 /* Create vectors for quadrature rules */ 953 /* TODO preserve other quadratures */ 954 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 955 for (i=0;i<nquads;i++) { 956 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 957 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 958 } 959 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 960 961 /* Create change of basis matrix (preallocation can be improved) */ 962 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 963 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 964 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 965 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 966 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 967 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 968 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 969 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 970 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 971 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 972 973 /* Defaults to identity */ 974 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 975 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 976 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 977 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 978 979 for (i=0;i<nee;i++) { 980 Mat Gins = NULL, GKins = NULL; 981 982 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 983 if (Gins && GKins) { 984 PetscScalar *data; 985 const PetscInt *rows,*cols; 986 PetscInt nrh,nch,nrc,ncc; 987 988 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 989 /* H1 */ 990 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 991 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 992 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 993 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 994 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 995 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 996 /* complement */ 997 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 998 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 999 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 1000 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1001 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1002 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1003 /* Gins kernel quadratures */ 1004 for (j=0;j<ncc;j++) { 1005 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 1006 } 1007 /* H1 average */ 1008 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1009 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1010 } 1011 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1012 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1013 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1014 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1015 } 1016 1017 /* Start assembling */ 1018 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1019 for (i=0;i<nquads;i++) { 1020 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 1021 } 1022 1023 /* Free */ 1024 if (fl2g) { 1025 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1026 for (i=0;i<nee;i++) { 1027 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1028 } 1029 ierr = PetscFree(eedges);CHKERRQ(ierr); 1030 } 1031 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1032 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1033 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1034 ierr = PetscFree(extrow);CHKERRQ(ierr); 1035 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1036 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1037 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1038 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1039 ierr = PetscFree(vals);CHKERRQ(ierr); 1040 ierr = PetscFree(extrows);CHKERRQ(ierr); 1041 ierr = PetscFree(extcols);CHKERRQ(ierr); 1042 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1043 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1044 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1045 1046 /* Complete assembling */ 1047 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1048 for (i=0;i<nquads;i++) { 1049 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 1050 } 1051 for (i=0;i<nquads;i++) { 1052 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 1053 } 1054 ierr = PetscFree(quads);CHKERRQ(ierr); 1055 1056 /* set change of basis */ 1057 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1058 ierr = MatDestroy(&T);CHKERRQ(ierr); 1059 1060 /* set quadratures */ 1061 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 1062 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 1063 1064 PetscFunctionReturn(0); 1065 } 1066 1067 /* the near-null space of BDDC carries information on quadrature weights, 1068 and these can be collinear -> so cheat with MatNullSpaceCreate 1069 and create a suitable set of basis vectors first */ 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1072 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1073 { 1074 PetscErrorCode ierr; 1075 PetscInt i; 1076 1077 PetscFunctionBegin; 1078 for (i=0;i<nvecs;i++) { 1079 PetscInt first,last; 1080 1081 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1082 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1083 if (i>=first && i < last) { 1084 PetscScalar *data; 1085 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1086 if (!has_const) { 1087 data[i-first] = 1.; 1088 } else { 1089 data[2*i-first] = 1./PetscSqrtReal(2.); 1090 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1091 } 1092 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1093 } 1094 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1095 } 1096 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1097 for (i=0;i<nvecs;i++) { /* reset vectors */ 1098 PetscInt first,last; 1099 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1100 if (i>=first && i < last) { 1101 PetscScalar *data; 1102 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1103 if (!has_const) { 1104 data[i-first] = 0.; 1105 } else { 1106 data[2*i-first] = 0.; 1107 data[2*i-first+1] = 0.; 1108 } 1109 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1110 } 1111 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1112 } 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1118 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1119 { 1120 Mat loc_divudotp; 1121 Vec p,v,vins,quad_vec,*quad_vecs; 1122 ISLocalToGlobalMapping map; 1123 IS *faces,*edges; 1124 PetscScalar *vals; 1125 const PetscScalar *array; 1126 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1127 PetscMPIInt rank; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1132 if (graph->twodim) { 1133 lmaxneighs = 2; 1134 } else { 1135 lmaxneighs = 1; 1136 for (i=0;i<ne;i++) { 1137 const PetscInt *idxs; 1138 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1139 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1140 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1141 } 1142 lmaxneighs++; /* graph count does not include self */ 1143 } 1144 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1145 maxsize = 0; 1146 for (i=0;i<ne;i++) { 1147 PetscInt nn; 1148 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1149 maxsize = PetscMax(maxsize,nn); 1150 } 1151 for (i=0;i<nf;i++) { 1152 PetscInt nn; 1153 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1154 maxsize = PetscMax(maxsize,nn); 1155 } 1156 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1157 /* create vectors to hold quadrature weights */ 1158 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1159 if (!transpose) { 1160 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1161 } else { 1162 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1163 } 1164 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1165 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1166 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1167 for (i=0;i<maxneighs;i++) { 1168 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1169 } 1170 1171 /* compute local quad vec */ 1172 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1173 if (!transpose) { 1174 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1175 } else { 1176 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1177 } 1178 ierr = VecSet(p,1.);CHKERRQ(ierr); 1179 if (!transpose) { 1180 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1181 } else { 1182 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1183 } 1184 if (vl2l) { 1185 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1186 } else { 1187 vins = v; 1188 } 1189 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1190 ierr = VecDestroy(&p);CHKERRQ(ierr); 1191 1192 /* insert in global quadrature vecs */ 1193 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1194 for (i=0;i<nf;i++) { 1195 const PetscInt *idxs; 1196 PetscInt idx,nn,j; 1197 1198 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1199 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1200 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1201 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1202 idx = -(idx+1); 1203 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1204 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1205 } 1206 for (i=0;i<ne;i++) { 1207 const PetscInt *idxs; 1208 PetscInt idx,nn,j; 1209 1210 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1211 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1212 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1213 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1214 idx = -(idx+1); 1215 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1216 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1217 } 1218 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1219 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1220 if (vl2l) { 1221 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1222 } 1223 ierr = VecDestroy(&v);CHKERRQ(ierr); 1224 ierr = PetscFree(vals);CHKERRQ(ierr); 1225 1226 /* assemble near null space */ 1227 for (i=0;i<maxneighs;i++) { 1228 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1229 } 1230 for (i=0;i<maxneighs;i++) { 1231 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1232 } 1233 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1240 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1241 { 1242 PetscErrorCode ierr; 1243 Vec local,global; 1244 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1245 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1246 1247 PetscFunctionBegin; 1248 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1249 /* need to convert from global to local topology information and remove references to information in global ordering */ 1250 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1251 if (pcbddc->user_provided_isfordofs) { 1252 if (pcbddc->n_ISForDofs) { 1253 PetscInt i; 1254 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1255 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1256 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1257 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1258 } 1259 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1260 pcbddc->n_ISForDofs = 0; 1261 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1262 } 1263 } else { 1264 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1265 PetscInt i, n = matis->A->rmap->n; 1266 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1267 if (i > 1) { 1268 pcbddc->n_ISForDofsLocal = i; 1269 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1270 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1271 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1272 } 1273 } 1274 } 1275 } 1276 1277 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1278 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1279 } 1280 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1281 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1282 } 1283 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1284 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1285 } 1286 ierr = VecDestroy(&global);CHKERRQ(ierr); 1287 ierr = VecDestroy(&local);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1293 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1294 { 1295 PC_IS *pcis = (PC_IS*)(pc->data); 1296 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1297 PetscErrorCode ierr; 1298 1299 PetscFunctionBegin; 1300 if (!pcbddc->benign_have_null) { 1301 PetscFunctionReturn(0); 1302 } 1303 if (pcbddc->ChangeOfBasisMatrix) { 1304 Vec swap; 1305 1306 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1307 swap = pcbddc->work_change; 1308 pcbddc->work_change = r; 1309 r = swap; 1310 } 1311 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1312 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1313 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1314 ierr = VecSet(z,0.);CHKERRQ(ierr); 1315 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1316 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1317 if (pcbddc->ChangeOfBasisMatrix) { 1318 Vec swap; 1319 1320 swap = r; 1321 r = pcbddc->work_change; 1322 pcbddc->work_change = swap; 1323 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1324 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1325 } 1326 PetscFunctionReturn(0); 1327 } 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1331 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1332 { 1333 PCBDDCBenignMatMult_ctx ctx; 1334 PetscErrorCode ierr; 1335 PetscBool apply_right,apply_left,reset_x; 1336 1337 PetscFunctionBegin; 1338 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1339 if (transpose) { 1340 apply_right = ctx->apply_left; 1341 apply_left = ctx->apply_right; 1342 } else { 1343 apply_right = ctx->apply_right; 1344 apply_left = ctx->apply_left; 1345 } 1346 reset_x = PETSC_FALSE; 1347 if (apply_right) { 1348 const PetscScalar *ax; 1349 PetscInt nl,i; 1350 1351 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1352 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1353 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1354 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1355 for (i=0;i<ctx->benign_n;i++) { 1356 PetscScalar sum,val; 1357 const PetscInt *idxs; 1358 PetscInt nz,j; 1359 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1360 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1361 sum = 0.; 1362 if (ctx->apply_p0) { 1363 val = ctx->work[idxs[nz-1]]; 1364 for (j=0;j<nz-1;j++) { 1365 sum += ctx->work[idxs[j]]; 1366 ctx->work[idxs[j]] += val; 1367 } 1368 } else { 1369 for (j=0;j<nz-1;j++) { 1370 sum += ctx->work[idxs[j]]; 1371 } 1372 } 1373 ctx->work[idxs[nz-1]] -= sum; 1374 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1375 } 1376 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1377 reset_x = PETSC_TRUE; 1378 } 1379 if (transpose) { 1380 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1381 } else { 1382 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1383 } 1384 if (reset_x) { 1385 ierr = VecResetArray(x);CHKERRQ(ierr); 1386 } 1387 if (apply_left) { 1388 PetscScalar *ay; 1389 PetscInt i; 1390 1391 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1392 for (i=0;i<ctx->benign_n;i++) { 1393 PetscScalar sum,val; 1394 const PetscInt *idxs; 1395 PetscInt nz,j; 1396 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1397 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1398 val = -ay[idxs[nz-1]]; 1399 if (ctx->apply_p0) { 1400 sum = 0.; 1401 for (j=0;j<nz-1;j++) { 1402 sum += ay[idxs[j]]; 1403 ay[idxs[j]] += val; 1404 } 1405 ay[idxs[nz-1]] += sum; 1406 } else { 1407 for (j=0;j<nz-1;j++) { 1408 ay[idxs[j]] += val; 1409 } 1410 ay[idxs[nz-1]] = 0.; 1411 } 1412 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1413 } 1414 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1415 } 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1421 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1422 { 1423 PetscErrorCode ierr; 1424 1425 PetscFunctionBegin; 1426 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1432 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1433 { 1434 PetscErrorCode ierr; 1435 1436 PetscFunctionBegin; 1437 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "PCBDDCBenignShellMat" 1443 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1444 { 1445 PC_IS *pcis = (PC_IS*)pc->data; 1446 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1447 PCBDDCBenignMatMult_ctx ctx; 1448 PetscErrorCode ierr; 1449 1450 PetscFunctionBegin; 1451 if (!restore) { 1452 Mat A_IB,A_BI; 1453 PetscScalar *work; 1454 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1455 1456 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1457 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1458 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1459 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1460 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1461 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1462 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1463 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1464 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1465 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1466 ctx->apply_left = PETSC_TRUE; 1467 ctx->apply_right = PETSC_FALSE; 1468 ctx->apply_p0 = PETSC_FALSE; 1469 ctx->benign_n = pcbddc->benign_n; 1470 if (reuse) { 1471 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1472 ctx->free = PETSC_FALSE; 1473 } else { /* TODO: could be optimized for successive solves */ 1474 ISLocalToGlobalMapping N_to_D; 1475 PetscInt i; 1476 1477 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1478 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1479 for (i=0;i<pcbddc->benign_n;i++) { 1480 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1481 } 1482 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1483 ctx->free = PETSC_TRUE; 1484 } 1485 ctx->A = pcis->A_IB; 1486 ctx->work = work; 1487 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1488 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1489 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1490 pcis->A_IB = A_IB; 1491 1492 /* A_BI as A_IB^T */ 1493 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1494 pcbddc->benign_original_mat = pcis->A_BI; 1495 pcis->A_BI = A_BI; 1496 } else { 1497 if (!pcbddc->benign_original_mat) { 1498 PetscFunctionReturn(0); 1499 } 1500 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1501 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1502 pcis->A_IB = ctx->A; 1503 ctx->A = NULL; 1504 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1505 pcis->A_BI = pcbddc->benign_original_mat; 1506 pcbddc->benign_original_mat = NULL; 1507 if (ctx->free) { 1508 PetscInt i; 1509 for (i=0;i<ctx->benign_n;i++) { 1510 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1511 } 1512 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1513 } 1514 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1515 ierr = PetscFree(ctx);CHKERRQ(ierr); 1516 } 1517 PetscFunctionReturn(0); 1518 } 1519 1520 /* used just in bddc debug mode */ 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "PCBDDCBenignProject" 1523 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1524 { 1525 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1526 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1527 Mat An; 1528 PetscErrorCode ierr; 1529 1530 PetscFunctionBegin; 1531 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1532 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1533 if (is1) { 1534 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1535 ierr = MatDestroy(&An);CHKERRQ(ierr); 1536 } else { 1537 *B = An; 1538 } 1539 PetscFunctionReturn(0); 1540 } 1541 1542 /* TODO: add reuse flag */ 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatSeqAIJCompress" 1545 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1546 { 1547 Mat Bt; 1548 PetscScalar *a,*bdata; 1549 const PetscInt *ii,*ij; 1550 PetscInt m,n,i,nnz,*bii,*bij; 1551 PetscBool flg_row; 1552 PetscErrorCode ierr; 1553 1554 PetscFunctionBegin; 1555 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1556 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1557 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1558 nnz = n; 1559 for (i=0;i<ii[n];i++) { 1560 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1561 } 1562 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1563 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1564 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1565 nnz = 0; 1566 bii[0] = 0; 1567 for (i=0;i<n;i++) { 1568 PetscInt j; 1569 for (j=ii[i];j<ii[i+1];j++) { 1570 PetscScalar entry = a[j]; 1571 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1572 bij[nnz] = ij[j]; 1573 bdata[nnz] = entry; 1574 nnz++; 1575 } 1576 } 1577 bii[i+1] = nnz; 1578 } 1579 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1580 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1581 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1582 { 1583 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1584 b->free_a = PETSC_TRUE; 1585 b->free_ij = PETSC_TRUE; 1586 } 1587 *B = Bt; 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatDetectDisconnectedComponents" 1593 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1594 { 1595 Mat B; 1596 IS is_dummy,*cc_n; 1597 ISLocalToGlobalMapping l2gmap_dummy; 1598 PCBDDCGraph graph; 1599 PetscInt i,n; 1600 PetscInt *xadj,*adjncy; 1601 PetscInt *xadj_filtered,*adjncy_filtered; 1602 PetscBool flg_row,isseqaij; 1603 PetscErrorCode ierr; 1604 1605 PetscFunctionBegin; 1606 if (!A->rmap->N || !A->cmap->N) { 1607 *ncc = 0; 1608 *cc = NULL; 1609 PetscFunctionReturn(0); 1610 } 1611 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1612 if (!isseqaij && filter) { 1613 PetscBool isseqdense; 1614 1615 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1616 if (!isseqdense) { 1617 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1618 } else { /* TODO: rectangular case and LDA */ 1619 PetscScalar *array; 1620 PetscReal chop=1.e-6; 1621 1622 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1623 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1624 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1625 for (i=0;i<n;i++) { 1626 PetscInt j; 1627 for (j=i+1;j<n;j++) { 1628 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1629 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1630 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1631 } 1632 } 1633 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1634 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1635 } 1636 } else { 1637 B = A; 1638 } 1639 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1640 1641 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1642 if (filter) { 1643 PetscScalar *data; 1644 PetscInt j,cum; 1645 1646 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1647 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1648 cum = 0; 1649 for (i=0;i<n;i++) { 1650 PetscInt t; 1651 1652 for (j=xadj[i];j<xadj[i+1];j++) { 1653 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1654 continue; 1655 } 1656 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1657 } 1658 t = xadj_filtered[i]; 1659 xadj_filtered[i] = cum; 1660 cum += t; 1661 } 1662 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1663 } else { 1664 xadj_filtered = NULL; 1665 adjncy_filtered = NULL; 1666 } 1667 1668 /* compute local connected components using PCBDDCGraph */ 1669 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1670 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1671 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1672 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1673 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1674 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1675 if (xadj_filtered) { 1676 graph->xadj = xadj_filtered; 1677 graph->adjncy = adjncy_filtered; 1678 } else { 1679 graph->xadj = xadj; 1680 graph->adjncy = adjncy; 1681 } 1682 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1683 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1684 /* partial clean up */ 1685 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1686 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1687 if (A != B) { 1688 ierr = MatDestroy(&B);CHKERRQ(ierr); 1689 } 1690 1691 /* get back data */ 1692 if (ncc) *ncc = graph->ncc; 1693 if (cc) { 1694 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1695 for (i=0;i<graph->ncc;i++) { 1696 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1697 } 1698 *cc = cc_n; 1699 } 1700 /* clean up graph */ 1701 graph->xadj = 0; 1702 graph->adjncy = 0; 1703 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "PCBDDCBenignCheck" 1709 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1710 { 1711 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1712 PC_IS* pcis = (PC_IS*)(pc->data); 1713 IS dirIS = NULL; 1714 PetscInt i; 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1719 if (zerodiag) { 1720 Mat A; 1721 Vec vec3_N; 1722 PetscScalar *vals; 1723 const PetscInt *idxs; 1724 PetscInt nz,*count; 1725 1726 /* p0 */ 1727 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1728 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1729 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1730 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1731 for (i=0;i<nz;i++) vals[i] = 1.; 1732 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1733 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1734 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1735 /* v_I */ 1736 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1737 for (i=0;i<nz;i++) vals[i] = 0.; 1738 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1740 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1741 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1742 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1743 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1744 if (dirIS) { 1745 PetscInt n; 1746 1747 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1748 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1749 for (i=0;i<n;i++) vals[i] = 0.; 1750 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1751 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1752 } 1753 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1754 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1755 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1756 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1757 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1758 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1759 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1760 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1761 ierr = PetscFree(vals);CHKERRQ(ierr); 1762 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1763 1764 /* there should not be any pressure dofs lying on the interface */ 1765 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1766 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1767 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1768 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1769 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1770 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1771 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1772 ierr = PetscFree(count);CHKERRQ(ierr); 1773 } 1774 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1775 1776 /* check PCBDDCBenignGetOrSetP0 */ 1777 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1778 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1779 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1780 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1781 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1782 for (i=0;i<pcbddc->benign_n;i++) { 1783 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1784 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1785 } 1786 PetscFunctionReturn(0); 1787 } 1788 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1791 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1792 { 1793 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1794 IS pressures,zerodiag,*zerodiag_subs; 1795 PetscInt nz,n; 1796 PetscInt *interior_dofs,n_interior_dofs; 1797 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1798 PetscErrorCode ierr; 1799 1800 PetscFunctionBegin; 1801 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1802 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1803 for (n=0;n<pcbddc->benign_n;n++) { 1804 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1805 } 1806 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1807 pcbddc->benign_n = 0; 1808 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1809 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1810 Checks if all the pressure dofs in each subdomain have a zero diagonal 1811 If not, a change of basis on pressures is not needed 1812 since the local Schur complements are already SPD 1813 */ 1814 has_null_pressures = PETSC_TRUE; 1815 have_null = PETSC_TRUE; 1816 if (pcbddc->n_ISForDofsLocal) { 1817 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1818 1819 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1820 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1821 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1822 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1823 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1824 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1825 if (!sorted) { 1826 ierr = ISSort(pressures);CHKERRQ(ierr); 1827 } 1828 } else { 1829 pressures = NULL; 1830 } 1831 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1832 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1833 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1834 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1835 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1836 if (!sorted) { 1837 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1838 } 1839 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1840 if (!nz) { 1841 if (n) have_null = PETSC_FALSE; 1842 has_null_pressures = PETSC_FALSE; 1843 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1844 } 1845 recompute_zerodiag = PETSC_FALSE; 1846 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1847 zerodiag_subs = NULL; 1848 pcbddc->benign_n = 0; 1849 n_interior_dofs = 0; 1850 interior_dofs = NULL; 1851 if (pcbddc->current_level) { /* need to compute interior nodes */ 1852 PetscInt n,i,j; 1853 PetscInt n_neigh,*neigh,*n_shared,**shared; 1854 PetscInt *iwork; 1855 1856 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1857 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1858 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1859 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1860 for (i=1;i<n_neigh;i++) 1861 for (j=0;j<n_shared[i];j++) 1862 iwork[shared[i][j]] += 1; 1863 for (i=0;i<n;i++) 1864 if (!iwork[i]) 1865 interior_dofs[n_interior_dofs++] = i; 1866 ierr = PetscFree(iwork);CHKERRQ(ierr); 1867 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1868 } 1869 if (has_null_pressures) { 1870 IS *subs; 1871 PetscInt nsubs,i,j,nl; 1872 const PetscInt *idxs; 1873 PetscScalar *array; 1874 Vec *work; 1875 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1876 1877 subs = pcbddc->local_subs; 1878 nsubs = pcbddc->n_local_subs; 1879 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1880 if (pcbddc->current_level) { 1881 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1882 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1883 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1884 /* work[0] = 1_p */ 1885 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1886 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1887 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1888 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1889 /* work[0] = 1_v */ 1890 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1891 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1892 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1893 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1894 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1895 } 1896 if (nsubs > 1) { 1897 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1898 for (i=0;i<nsubs;i++) { 1899 ISLocalToGlobalMapping l2g; 1900 IS t_zerodiag_subs; 1901 PetscInt nl; 1902 1903 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1904 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1905 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1906 if (nl) { 1907 PetscBool valid = PETSC_TRUE; 1908 1909 if (pcbddc->current_level) { 1910 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1911 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1912 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1913 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1914 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1915 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1916 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 1917 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 1918 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1919 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 1920 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1921 for (j=0;j<n_interior_dofs;j++) { 1922 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1923 valid = PETSC_FALSE; 1924 break; 1925 } 1926 } 1927 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1928 } 1929 if (valid && pcbddc->NeumannBoundariesLocal) { 1930 IS t_bc; 1931 PetscInt nzb; 1932 1933 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 1934 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 1935 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 1936 if (nzb) valid = PETSC_FALSE; 1937 } 1938 if (valid && pressures) { 1939 IS t_pressure_subs; 1940 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 1941 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 1942 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 1943 } 1944 if (valid) { 1945 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 1946 pcbddc->benign_n++; 1947 } else { 1948 recompute_zerodiag = PETSC_TRUE; 1949 } 1950 } 1951 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 1952 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 1953 } 1954 } else { /* there's just one subdomain (or zero if they have not been detected */ 1955 PetscBool valid = PETSC_TRUE; 1956 1957 if (pcbddc->NeumannBoundariesLocal) { 1958 PetscInt nzb; 1959 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 1960 if (nzb) valid = PETSC_FALSE; 1961 } 1962 if (valid && pressures) { 1963 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 1964 } 1965 if (valid && pcbddc->current_level) { 1966 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 1967 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 1968 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1969 for (j=0;j<n_interior_dofs;j++) { 1970 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1971 valid = PETSC_FALSE; 1972 break; 1973 } 1974 } 1975 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1976 } 1977 if (valid) { 1978 pcbddc->benign_n = 1; 1979 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 1980 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 1981 zerodiag_subs[0] = zerodiag; 1982 } 1983 } 1984 if (pcbddc->current_level) { 1985 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 1986 } 1987 } 1988 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 1989 1990 if (!pcbddc->benign_n) { 1991 PetscInt n; 1992 1993 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1994 recompute_zerodiag = PETSC_FALSE; 1995 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1996 if (n) { 1997 has_null_pressures = PETSC_FALSE; 1998 have_null = PETSC_FALSE; 1999 } 2000 } 2001 2002 /* final check for null pressures */ 2003 if (zerodiag && pressures) { 2004 PetscInt nz,np; 2005 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2006 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2007 if (nz != np) have_null = PETSC_FALSE; 2008 } 2009 2010 if (recompute_zerodiag) { 2011 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2012 if (pcbddc->benign_n == 1) { 2013 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2014 zerodiag = zerodiag_subs[0]; 2015 } else { 2016 PetscInt i,nzn,*new_idxs; 2017 2018 nzn = 0; 2019 for (i=0;i<pcbddc->benign_n;i++) { 2020 PetscInt ns; 2021 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2022 nzn += ns; 2023 } 2024 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2025 nzn = 0; 2026 for (i=0;i<pcbddc->benign_n;i++) { 2027 PetscInt ns,*idxs; 2028 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2029 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2030 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2031 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2032 nzn += ns; 2033 } 2034 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2035 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2036 } 2037 have_null = PETSC_FALSE; 2038 } 2039 2040 /* Prepare matrix to compute no-net-flux */ 2041 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2042 Mat A,loc_divudotp; 2043 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2044 IS row,col,isused = NULL; 2045 PetscInt M,N,n,st,n_isused; 2046 2047 if (pressures) { 2048 isused = pressures; 2049 } else { 2050 isused = zerodiag; 2051 } 2052 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2053 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2054 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2055 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2056 n_isused = 0; 2057 if (isused) { 2058 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2059 } 2060 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2061 st = st-n_isused; 2062 if (n) { 2063 const PetscInt *gidxs; 2064 2065 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2066 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2067 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2068 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2069 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2070 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2071 } else { 2072 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2073 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2074 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2075 } 2076 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2077 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2078 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2079 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2080 ierr = ISDestroy(&row);CHKERRQ(ierr); 2081 ierr = ISDestroy(&col);CHKERRQ(ierr); 2082 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2083 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2084 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2085 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2086 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2087 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2088 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2089 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2090 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2091 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2092 } 2093 2094 /* change of basis and p0 dofs */ 2095 if (has_null_pressures) { 2096 IS zerodiagc; 2097 const PetscInt *idxs,*idxsc; 2098 PetscInt i,s,*nnz; 2099 2100 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2101 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2102 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2103 /* local change of basis for pressures */ 2104 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2105 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2106 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2107 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2108 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2109 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2110 for (i=0;i<pcbddc->benign_n;i++) { 2111 PetscInt nzs,j; 2112 2113 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2114 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2115 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2116 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2117 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2118 } 2119 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2120 ierr = PetscFree(nnz);CHKERRQ(ierr); 2121 /* set identity on velocities */ 2122 for (i=0;i<n-nz;i++) { 2123 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2124 } 2125 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2126 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2127 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2128 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2129 /* set change on pressures */ 2130 for (s=0;s<pcbddc->benign_n;s++) { 2131 PetscScalar *array; 2132 PetscInt nzs; 2133 2134 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2135 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2136 for (i=0;i<nzs-1;i++) { 2137 PetscScalar vals[2]; 2138 PetscInt cols[2]; 2139 2140 cols[0] = idxs[i]; 2141 cols[1] = idxs[nzs-1]; 2142 vals[0] = 1.; 2143 vals[1] = 1.; 2144 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2145 } 2146 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2147 for (i=0;i<nzs-1;i++) array[i] = -1.; 2148 array[nzs-1] = 1.; 2149 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2150 /* store local idxs for p0 */ 2151 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2152 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2153 ierr = PetscFree(array);CHKERRQ(ierr); 2154 } 2155 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 /* project if needed */ 2158 if (pcbddc->benign_change_explicit) { 2159 Mat M; 2160 2161 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2162 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2163 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2164 ierr = MatDestroy(&M);CHKERRQ(ierr); 2165 } 2166 /* store global idxs for p0 */ 2167 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2168 } 2169 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2170 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2171 2172 /* determines if the coarse solver will be singular or not */ 2173 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2174 /* determines if the problem has subdomains with 0 pressure block */ 2175 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2176 *zerodiaglocal = zerodiag; 2177 PetscFunctionReturn(0); 2178 } 2179 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2182 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2183 { 2184 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2185 PetscScalar *array; 2186 PetscErrorCode ierr; 2187 2188 PetscFunctionBegin; 2189 if (!pcbddc->benign_sf) { 2190 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2191 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2192 } 2193 if (get) { 2194 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2195 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2196 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2197 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2198 } else { 2199 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2200 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2201 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2202 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2203 } 2204 PetscFunctionReturn(0); 2205 } 2206 2207 #undef __FUNCT__ 2208 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2209 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2210 { 2211 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2212 PetscErrorCode ierr; 2213 2214 PetscFunctionBegin; 2215 /* TODO: add error checking 2216 - avoid nested pop (or push) calls. 2217 - cannot push before pop. 2218 - cannot call this if pcbddc->local_mat is NULL 2219 */ 2220 if (!pcbddc->benign_n) { 2221 PetscFunctionReturn(0); 2222 } 2223 if (pop) { 2224 if (pcbddc->benign_change_explicit) { 2225 IS is_p0; 2226 MatReuse reuse; 2227 2228 /* extract B_0 */ 2229 reuse = MAT_INITIAL_MATRIX; 2230 if (pcbddc->benign_B0) { 2231 reuse = MAT_REUSE_MATRIX; 2232 } 2233 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2234 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2235 /* remove rows and cols from local problem */ 2236 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2237 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2238 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2239 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2240 } else { 2241 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2242 PetscScalar *vals; 2243 PetscInt i,n,*idxs_ins; 2244 2245 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2246 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2247 if (!pcbddc->benign_B0) { 2248 PetscInt *nnz; 2249 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2250 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2251 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2252 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2253 for (i=0;i<pcbddc->benign_n;i++) { 2254 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2255 nnz[i] = n - nnz[i]; 2256 } 2257 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz);CHKERRQ(ierr); 2259 } 2260 2261 for (i=0;i<pcbddc->benign_n;i++) { 2262 PetscScalar *array; 2263 PetscInt *idxs,j,nz,cum; 2264 2265 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2266 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2267 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2268 for (j=0;j<nz;j++) vals[j] = 1.; 2269 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2270 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2271 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2272 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2273 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2274 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2275 cum = 0; 2276 for (j=0;j<n;j++) { 2277 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2278 vals[cum] = array[j]; 2279 idxs_ins[cum] = j; 2280 cum++; 2281 } 2282 } 2283 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2284 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2285 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2286 } 2287 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2288 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2289 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2290 } 2291 } else { /* push */ 2292 if (pcbddc->benign_change_explicit) { 2293 PetscInt i; 2294 2295 for (i=0;i<pcbddc->benign_n;i++) { 2296 PetscScalar *B0_vals; 2297 PetscInt *B0_cols,B0_ncol; 2298 2299 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2300 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2301 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2302 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2303 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2304 } 2305 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2306 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2307 } else { 2308 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2309 } 2310 } 2311 PetscFunctionReturn(0); 2312 } 2313 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2316 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2317 { 2318 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2319 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2320 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2321 PetscBLASInt *B_iwork,*B_ifail; 2322 PetscScalar *work,lwork; 2323 PetscScalar *St,*S,*eigv; 2324 PetscScalar *Sarray,*Starray; 2325 PetscReal *eigs,thresh; 2326 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2327 PetscBool allocated_S_St; 2328 #if defined(PETSC_USE_COMPLEX) 2329 PetscReal *rwork; 2330 #endif 2331 PetscErrorCode ierr; 2332 2333 PetscFunctionBegin; 2334 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2335 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2336 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2337 2338 if (pcbddc->dbg_flag) { 2339 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2340 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2341 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2342 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2343 } 2344 2345 if (pcbddc->dbg_flag) { 2346 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2347 } 2348 2349 /* max size of subsets */ 2350 mss = 0; 2351 for (i=0;i<sub_schurs->n_subs;i++) { 2352 PetscInt subset_size; 2353 2354 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2355 mss = PetscMax(mss,subset_size); 2356 } 2357 2358 /* min/max and threshold */ 2359 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2360 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2361 nmax = PetscMax(nmin,nmax); 2362 allocated_S_St = PETSC_FALSE; 2363 if (nmin) { 2364 allocated_S_St = PETSC_TRUE; 2365 } 2366 2367 /* allocate lapack workspace */ 2368 cum = cum2 = 0; 2369 maxneigs = 0; 2370 for (i=0;i<sub_schurs->n_subs;i++) { 2371 PetscInt n,subset_size; 2372 2373 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2374 n = PetscMin(subset_size,nmax); 2375 cum += subset_size; 2376 cum2 += subset_size*n; 2377 maxneigs = PetscMax(maxneigs,n); 2378 } 2379 if (mss) { 2380 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2381 PetscBLASInt B_itype = 1; 2382 PetscBLASInt B_N = mss; 2383 PetscReal zero = 0.0; 2384 PetscReal eps = 0.0; /* dlamch? */ 2385 2386 B_lwork = -1; 2387 S = NULL; 2388 St = NULL; 2389 eigs = NULL; 2390 eigv = NULL; 2391 B_iwork = NULL; 2392 B_ifail = NULL; 2393 #if defined(PETSC_USE_COMPLEX) 2394 rwork = NULL; 2395 #endif 2396 thresh = 1.0; 2397 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2398 #if defined(PETSC_USE_COMPLEX) 2399 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2400 #else 2401 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2402 #endif 2403 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2404 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2405 } else { 2406 /* TODO */ 2407 } 2408 } else { 2409 lwork = 0; 2410 } 2411 2412 nv = 0; 2413 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2414 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2415 } 2416 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2417 if (allocated_S_St) { 2418 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2419 } 2420 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2421 #if defined(PETSC_USE_COMPLEX) 2422 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2423 #endif 2424 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2425 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2426 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2427 nv+cum,&pcbddc->adaptive_constraints_idxs, 2428 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2429 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2430 2431 maxneigs = 0; 2432 cum = cumarray = 0; 2433 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2434 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2435 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2436 const PetscInt *idxs; 2437 2438 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2439 for (cum=0;cum<nv;cum++) { 2440 pcbddc->adaptive_constraints_n[cum] = 1; 2441 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2442 pcbddc->adaptive_constraints_data[cum] = 1.0; 2443 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2444 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2445 } 2446 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2447 } 2448 2449 if (mss) { /* multilevel */ 2450 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2451 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2452 } 2453 2454 thresh = pcbddc->adaptive_threshold; 2455 for (i=0;i<sub_schurs->n_subs;i++) { 2456 const PetscInt *idxs; 2457 PetscReal upper,lower; 2458 PetscInt j,subset_size,eigs_start = 0; 2459 PetscBLASInt B_N; 2460 PetscBool same_data = PETSC_FALSE; 2461 2462 if (pcbddc->use_deluxe_scaling) { 2463 upper = PETSC_MAX_REAL; 2464 lower = thresh; 2465 } else { 2466 upper = 1./thresh; 2467 lower = 0.; 2468 } 2469 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2470 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2471 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2472 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2473 if (sub_schurs->is_hermitian) { 2474 PetscInt j,k; 2475 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2476 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2477 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2478 } 2479 for (j=0;j<subset_size;j++) { 2480 for (k=j;k<subset_size;k++) { 2481 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2482 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2483 } 2484 } 2485 } else { 2486 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2487 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2488 } 2489 } else { 2490 S = Sarray + cumarray; 2491 St = Starray + cumarray; 2492 } 2493 /* see if we can save some work */ 2494 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2495 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2496 } 2497 2498 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2499 B_neigs = 0; 2500 } else { 2501 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2502 PetscBLASInt B_itype = 1; 2503 PetscBLASInt B_IL, B_IU; 2504 PetscReal eps = -1.0; /* dlamch? */ 2505 PetscInt nmin_s; 2506 PetscBool compute_range = PETSC_FALSE; 2507 2508 if (pcbddc->dbg_flag) { 2509 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2510 } 2511 2512 compute_range = PETSC_FALSE; 2513 if (thresh > 1.+PETSC_SMALL && !same_data) { 2514 compute_range = PETSC_TRUE; 2515 } 2516 2517 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2518 if (compute_range) { 2519 2520 /* ask for eigenvalues larger than thresh */ 2521 #if defined(PETSC_USE_COMPLEX) 2522 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2523 #else 2524 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2525 #endif 2526 } else if (!same_data) { 2527 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2528 B_IL = 1; 2529 #if defined(PETSC_USE_COMPLEX) 2530 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2531 #else 2532 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2533 #endif 2534 } else { /* same_data is true, so get the adaptive function requested by the user */ 2535 PetscInt k; 2536 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2537 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2538 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2539 nmin = nmax; 2540 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2541 for (k=0;k<nmax;k++) { 2542 eigs[k] = 1./PETSC_SMALL; 2543 eigv[k*(subset_size+1)] = 1.0; 2544 } 2545 } 2546 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2547 if (B_ierr) { 2548 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2549 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2550 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2551 } 2552 2553 if (B_neigs > nmax) { 2554 if (pcbddc->dbg_flag) { 2555 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2556 } 2557 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2558 B_neigs = nmax; 2559 } 2560 2561 nmin_s = PetscMin(nmin,B_N); 2562 if (B_neigs < nmin_s) { 2563 PetscBLASInt B_neigs2; 2564 2565 if (pcbddc->use_deluxe_scaling) { 2566 B_IL = B_N - nmin_s + 1; 2567 B_IU = B_N - B_neigs; 2568 } else { 2569 B_IL = B_neigs + 1; 2570 B_IU = nmin_s; 2571 } 2572 if (pcbddc->dbg_flag) { 2573 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2574 } 2575 if (sub_schurs->is_hermitian) { 2576 PetscInt j,k; 2577 for (j=0;j<subset_size;j++) { 2578 for (k=j;k<subset_size;k++) { 2579 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2580 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2581 } 2582 } 2583 } else { 2584 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2585 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2586 } 2587 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2588 #if defined(PETSC_USE_COMPLEX) 2589 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2590 #else 2591 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2592 #endif 2593 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2594 B_neigs += B_neigs2; 2595 } 2596 if (B_ierr) { 2597 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2598 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2599 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2600 } 2601 if (pcbddc->dbg_flag) { 2602 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2603 for (j=0;j<B_neigs;j++) { 2604 if (eigs[j] == 0.0) { 2605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2606 } else { 2607 if (pcbddc->use_deluxe_scaling) { 2608 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2609 } else { 2610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2611 } 2612 } 2613 } 2614 } 2615 } else { 2616 /* TODO */ 2617 } 2618 } 2619 /* change the basis back to the original one */ 2620 if (sub_schurs->change) { 2621 Mat change,phi,phit; 2622 2623 if (pcbddc->dbg_flag > 1) { 2624 PetscInt ii; 2625 for (ii=0;ii<B_neigs;ii++) { 2626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2627 for (j=0;j<B_N;j++) { 2628 #if defined(PETSC_USE_COMPLEX) 2629 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2630 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2632 #else 2633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2634 #endif 2635 } 2636 } 2637 } 2638 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2639 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2640 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2641 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2642 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2643 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2644 } 2645 maxneigs = PetscMax(B_neigs,maxneigs); 2646 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2647 if (B_neigs) { 2648 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2649 2650 if (pcbddc->dbg_flag > 1) { 2651 PetscInt ii; 2652 for (ii=0;ii<B_neigs;ii++) { 2653 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2654 for (j=0;j<B_N;j++) { 2655 #if defined(PETSC_USE_COMPLEX) 2656 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2657 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2658 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2659 #else 2660 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2661 #endif 2662 } 2663 } 2664 } 2665 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2666 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2667 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2668 cum++; 2669 } 2670 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2671 /* shift for next computation */ 2672 cumarray += subset_size*subset_size; 2673 } 2674 if (pcbddc->dbg_flag) { 2675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2676 } 2677 2678 if (mss) { 2679 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2680 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2681 /* destroy matrices (junk) */ 2682 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2683 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2684 } 2685 if (allocated_S_St) { 2686 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2687 } 2688 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2689 #if defined(PETSC_USE_COMPLEX) 2690 ierr = PetscFree(rwork);CHKERRQ(ierr); 2691 #endif 2692 if (pcbddc->dbg_flag) { 2693 PetscInt maxneigs_r; 2694 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2695 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2696 } 2697 PetscFunctionReturn(0); 2698 } 2699 2700 #undef __FUNCT__ 2701 #define __FUNCT__ "PCBDDCSetUpSolvers" 2702 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2703 { 2704 PetscScalar *coarse_submat_vals; 2705 PetscErrorCode ierr; 2706 2707 PetscFunctionBegin; 2708 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2709 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2710 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2711 2712 /* Setup local neumann solver ksp_R */ 2713 /* PCBDDCSetUpLocalScatters should be called first! */ 2714 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2715 2716 /* 2717 Setup local correction and local part of coarse basis. 2718 Gives back the dense local part of the coarse matrix in column major ordering 2719 */ 2720 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2721 2722 /* Compute total number of coarse nodes and setup coarse solver */ 2723 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2724 2725 /* free */ 2726 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2727 PetscFunctionReturn(0); 2728 } 2729 2730 #undef __FUNCT__ 2731 #define __FUNCT__ "PCBDDCResetCustomization" 2732 PetscErrorCode PCBDDCResetCustomization(PC pc) 2733 { 2734 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2735 PetscErrorCode ierr; 2736 2737 PetscFunctionBegin; 2738 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2739 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2740 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2741 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2742 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2743 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2744 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2745 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2746 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2747 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2748 PetscFunctionReturn(0); 2749 } 2750 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "PCBDDCResetTopography" 2753 PetscErrorCode PCBDDCResetTopography(PC pc) 2754 { 2755 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2756 PetscInt i; 2757 PetscErrorCode ierr; 2758 2759 PetscFunctionBegin; 2760 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2761 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2762 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2763 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2764 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2765 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2766 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2767 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2768 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2769 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2770 for (i=0;i<pcbddc->n_local_subs;i++) { 2771 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2772 } 2773 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2774 if (pcbddc->sub_schurs) { 2775 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2776 } 2777 pcbddc->graphanalyzed = PETSC_FALSE; 2778 PetscFunctionReturn(0); 2779 } 2780 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "PCBDDCResetSolvers" 2783 PetscErrorCode PCBDDCResetSolvers(PC pc) 2784 { 2785 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2786 PetscErrorCode ierr; 2787 2788 PetscFunctionBegin; 2789 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2790 if (pcbddc->coarse_phi_B) { 2791 PetscScalar *array; 2792 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2793 ierr = PetscFree(array);CHKERRQ(ierr); 2794 } 2795 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2796 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2797 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2798 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2799 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2800 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2801 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2802 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2803 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2804 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2805 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2806 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2807 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2808 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2809 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2810 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2811 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2812 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2813 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2814 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2815 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2816 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2817 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2818 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2819 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2820 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2821 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2822 if (pcbddc->benign_zerodiag_subs) { 2823 PetscInt i; 2824 for (i=0;i<pcbddc->benign_n;i++) { 2825 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2826 } 2827 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2828 } 2829 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2830 PetscFunctionReturn(0); 2831 } 2832 2833 #undef __FUNCT__ 2834 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2835 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2836 { 2837 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2838 PC_IS *pcis = (PC_IS*)pc->data; 2839 VecType impVecType; 2840 PetscInt n_constraints,n_R,old_size; 2841 PetscErrorCode ierr; 2842 2843 PetscFunctionBegin; 2844 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2845 /* get sizes */ 2846 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2847 n_R = pcis->n - pcbddc->n_vertices; 2848 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2849 /* local work vectors (try to avoid unneeded work)*/ 2850 /* R nodes */ 2851 old_size = -1; 2852 if (pcbddc->vec1_R) { 2853 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2854 } 2855 if (n_R != old_size) { 2856 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2857 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2858 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2859 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2860 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2861 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2862 } 2863 /* local primal dofs */ 2864 old_size = -1; 2865 if (pcbddc->vec1_P) { 2866 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2867 } 2868 if (pcbddc->local_primal_size != old_size) { 2869 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2870 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2871 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2872 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2873 } 2874 /* local explicit constraints */ 2875 old_size = -1; 2876 if (pcbddc->vec1_C) { 2877 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2878 } 2879 if (n_constraints && n_constraints != old_size) { 2880 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2881 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2882 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2883 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2884 } 2885 PetscFunctionReturn(0); 2886 } 2887 2888 #undef __FUNCT__ 2889 #define __FUNCT__ "PCBDDCSetUpCorrection" 2890 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2891 { 2892 PetscErrorCode ierr; 2893 /* pointers to pcis and pcbddc */ 2894 PC_IS* pcis = (PC_IS*)pc->data; 2895 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2896 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2897 /* submatrices of local problem */ 2898 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2899 /* submatrices of local coarse problem */ 2900 Mat S_VV,S_CV,S_VC,S_CC; 2901 /* working matrices */ 2902 Mat C_CR; 2903 /* additional working stuff */ 2904 PC pc_R; 2905 Mat F; 2906 Vec dummy_vec; 2907 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2908 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2909 PetscScalar *work; 2910 PetscInt *idx_V_B; 2911 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2912 PetscInt i,n_R,n_D,n_B; 2913 2914 /* some shortcuts to scalars */ 2915 PetscScalar one=1.0,m_one=-1.0; 2916 2917 PetscFunctionBegin; 2918 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 2919 2920 /* Set Non-overlapping dimensions */ 2921 n_vertices = pcbddc->n_vertices; 2922 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 2923 n_B = pcis->n_B; 2924 n_D = pcis->n - n_B; 2925 n_R = pcis->n - n_vertices; 2926 2927 /* vertices in boundary numbering */ 2928 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 2929 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 2930 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 2931 2932 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2933 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 2934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 2935 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 2936 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 2937 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 2938 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 2939 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 2940 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 2941 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 2942 2943 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 2944 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 2945 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 2946 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 2947 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 2948 lda_rhs = n_R; 2949 need_benign_correction = PETSC_FALSE; 2950 if (isLU || isILU || isCHOL) { 2951 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 2952 } else if (sub_schurs && sub_schurs->reuse_solver) { 2953 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2954 MatFactorType type; 2955 2956 F = reuse_solver->F; 2957 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 2958 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 2959 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 2960 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 2961 } else { 2962 F = NULL; 2963 } 2964 2965 /* allocate workspace */ 2966 n = 0; 2967 if (n_constraints) { 2968 n += lda_rhs*n_constraints; 2969 } 2970 if (n_vertices) { 2971 n = PetscMax(2*lda_rhs*n_vertices,n); 2972 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 2973 } 2974 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 2975 2976 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 2977 dummy_vec = NULL; 2978 if (need_benign_correction && lda_rhs != n_R && F) { 2979 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 2980 } 2981 2982 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2983 if (n_constraints) { 2984 Mat M1,M2,M3,C_B; 2985 IS is_aux; 2986 PetscScalar *array,*array2; 2987 2988 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2989 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2990 2991 /* Extract constraints on R nodes: C_{CR} */ 2992 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 2993 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2994 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2995 2996 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 2997 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 2998 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2999 for (i=0;i<n_constraints;i++) { 3000 const PetscScalar *row_cmat_values; 3001 const PetscInt *row_cmat_indices; 3002 PetscInt size_of_constraint,j; 3003 3004 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3005 for (j=0;j<size_of_constraint;j++) { 3006 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3007 } 3008 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3009 } 3010 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3011 if (F) { 3012 Mat B; 3013 3014 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3015 if (need_benign_correction) { 3016 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3017 3018 /* rhs is already zero on interior dofs, no need to change the rhs */ 3019 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3020 } 3021 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3022 if (need_benign_correction) { 3023 PetscScalar *marr; 3024 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3025 3026 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3027 if (lda_rhs != n_R) { 3028 for (i=0;i<n_constraints;i++) { 3029 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3030 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3031 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3032 } 3033 } else { 3034 for (i=0;i<n_constraints;i++) { 3035 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3036 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3037 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3038 } 3039 } 3040 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3041 } 3042 ierr = MatDestroy(&B);CHKERRQ(ierr); 3043 } else { 3044 PetscScalar *marr; 3045 3046 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3047 for (i=0;i<n_constraints;i++) { 3048 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3049 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3050 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3051 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3052 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3053 } 3054 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3055 } 3056 if (!pcbddc->switch_static) { 3057 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3058 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3059 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3060 for (i=0;i<n_constraints;i++) { 3061 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3062 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3063 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3064 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3065 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3066 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3067 } 3068 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3069 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3070 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3071 } else { 3072 if (lda_rhs != n_R) { 3073 IS dummy; 3074 3075 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3076 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3077 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3078 } else { 3079 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3080 pcbddc->local_auxmat2 = local_auxmat2_R; 3081 } 3082 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3083 } 3084 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3085 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3086 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3087 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3088 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3089 if (isCHOL) { 3090 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3091 } else { 3092 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3093 } 3094 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3095 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3096 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3097 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3098 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3099 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3100 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3101 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3102 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3103 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3104 } 3105 3106 /* Get submatrices from subdomain matrix */ 3107 if (n_vertices) { 3108 IS is_aux; 3109 3110 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3111 IS tis; 3112 3113 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3114 ierr = ISSort(tis);CHKERRQ(ierr); 3115 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3116 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3117 } else { 3118 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3119 } 3120 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3121 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3122 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3123 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3124 } 3125 3126 /* Matrix of coarse basis functions (local) */ 3127 if (pcbddc->coarse_phi_B) { 3128 PetscInt on_B,on_primal,on_D=n_D; 3129 if (pcbddc->coarse_phi_D) { 3130 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3131 } 3132 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3133 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3134 PetscScalar *marray; 3135 3136 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3137 ierr = PetscFree(marray);CHKERRQ(ierr); 3138 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3139 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3140 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3141 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3142 } 3143 } 3144 3145 if (!pcbddc->coarse_phi_B) { 3146 PetscScalar *marray; 3147 3148 n = n_B*pcbddc->local_primal_size; 3149 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3150 n += n_D*pcbddc->local_primal_size; 3151 } 3152 if (!pcbddc->symmetric_primal) { 3153 n *= 2; 3154 } 3155 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3156 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3157 n = n_B*pcbddc->local_primal_size; 3158 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3159 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3160 n += n_D*pcbddc->local_primal_size; 3161 } 3162 if (!pcbddc->symmetric_primal) { 3163 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3164 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3165 n = n_B*pcbddc->local_primal_size; 3166 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3167 } 3168 } else { 3169 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3170 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3171 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3172 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3173 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3174 } 3175 } 3176 } 3177 3178 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3179 p0_lidx_I = NULL; 3180 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3181 const PetscInt *idxs; 3182 3183 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3184 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3185 for (i=0;i<pcbddc->benign_n;i++) { 3186 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3187 } 3188 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3189 } 3190 3191 /* vertices */ 3192 if (n_vertices) { 3193 3194 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3195 3196 if (n_R) { 3197 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3198 PetscBLASInt B_N,B_one = 1; 3199 PetscScalar *x,*y; 3200 PetscBool isseqaij; 3201 3202 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3203 if (need_benign_correction) { 3204 ISLocalToGlobalMapping RtoN; 3205 IS is_p0; 3206 PetscInt *idxs_p0,n; 3207 3208 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3209 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3210 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3211 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3212 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3213 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3214 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3215 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3216 } 3217 3218 if (lda_rhs == n_R) { 3219 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3220 } else { 3221 PetscScalar *av,*array; 3222 const PetscInt *xadj,*adjncy; 3223 PetscInt n; 3224 PetscBool flg_row; 3225 3226 array = work+lda_rhs*n_vertices; 3227 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3228 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3229 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3230 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3231 for (i=0;i<n;i++) { 3232 PetscInt j; 3233 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3234 } 3235 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3236 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3237 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3238 } 3239 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3240 if (need_benign_correction) { 3241 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3242 PetscScalar *marr; 3243 3244 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3245 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3246 3247 | 0 0 0 | (V) 3248 L = | 0 0 -1 | (P-p0) 3249 | 0 0 -1 | (p0) 3250 3251 */ 3252 for (i=0;i<reuse_solver->benign_n;i++) { 3253 const PetscScalar *vals; 3254 const PetscInt *idxs,*idxs_zero; 3255 PetscInt n,j,nz; 3256 3257 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3258 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3259 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3260 for (j=0;j<n;j++) { 3261 PetscScalar val = vals[j]; 3262 PetscInt k,col = idxs[j]; 3263 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3264 } 3265 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3266 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3267 } 3268 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3269 } 3270 if (F) { 3271 /* need to correct the rhs */ 3272 if (need_benign_correction) { 3273 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3274 PetscScalar *marr; 3275 3276 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3277 if (lda_rhs != n_R) { 3278 for (i=0;i<n_vertices;i++) { 3279 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3280 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3281 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3282 } 3283 } else { 3284 for (i=0;i<n_vertices;i++) { 3285 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3286 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3287 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3288 } 3289 } 3290 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3291 } 3292 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3293 /* need to correct the solution */ 3294 if (need_benign_correction) { 3295 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3296 PetscScalar *marr; 3297 3298 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3299 if (lda_rhs != n_R) { 3300 for (i=0;i<n_vertices;i++) { 3301 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3302 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3303 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3304 } 3305 } else { 3306 for (i=0;i<n_vertices;i++) { 3307 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3308 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3309 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3310 } 3311 } 3312 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3313 } 3314 } else { 3315 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3316 for (i=0;i<n_vertices;i++) { 3317 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3318 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3319 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3320 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3321 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3322 } 3323 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3324 } 3325 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3326 /* S_VV and S_CV */ 3327 if (n_constraints) { 3328 Mat B; 3329 3330 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3331 for (i=0;i<n_vertices;i++) { 3332 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3333 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3334 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3335 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3336 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3337 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3338 } 3339 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3340 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3341 ierr = MatDestroy(&B);CHKERRQ(ierr); 3342 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3343 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3344 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3345 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3346 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3347 ierr = MatDestroy(&B);CHKERRQ(ierr); 3348 } 3349 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3350 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3351 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3352 } 3353 if (lda_rhs != n_R) { 3354 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3355 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3356 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3357 } 3358 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3359 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3360 if (need_benign_correction) { 3361 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3362 PetscScalar *marr,*sums; 3363 3364 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3365 ierr = MatDenseGetArray(S_VVt,&marr); 3366 for (i=0;i<reuse_solver->benign_n;i++) { 3367 const PetscScalar *vals; 3368 const PetscInt *idxs,*idxs_zero; 3369 PetscInt n,j,nz; 3370 3371 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3372 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3373 for (j=0;j<n_vertices;j++) { 3374 PetscInt k; 3375 sums[j] = 0.; 3376 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3377 } 3378 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3379 for (j=0;j<n;j++) { 3380 PetscScalar val = vals[j]; 3381 PetscInt k; 3382 for (k=0;k<n_vertices;k++) { 3383 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3384 } 3385 } 3386 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3387 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3388 } 3389 ierr = PetscFree(sums);CHKERRQ(ierr); 3390 ierr = MatDenseRestoreArray(S_VVt,&marr); 3391 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3392 } 3393 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3394 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3395 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3396 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3397 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3398 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3399 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3400 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3401 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3402 } else { 3403 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3404 } 3405 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3406 3407 /* coarse basis functions */ 3408 for (i=0;i<n_vertices;i++) { 3409 PetscScalar *y; 3410 3411 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3412 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3413 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3414 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3415 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3416 y[n_B*i+idx_V_B[i]] = 1.0; 3417 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3418 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3419 3420 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3421 PetscInt j; 3422 3423 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3424 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3425 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3426 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3427 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3428 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3429 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3430 } 3431 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3432 } 3433 /* if n_R == 0 the object is not destroyed */ 3434 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3435 } 3436 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3437 3438 if (n_constraints) { 3439 Mat B; 3440 3441 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3442 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3443 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3444 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3445 if (n_vertices) { 3446 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3447 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3448 } else { 3449 Mat S_VCt; 3450 3451 if (lda_rhs != n_R) { 3452 ierr = MatDestroy(&B);CHKERRQ(ierr); 3453 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3454 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3455 } 3456 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3457 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3458 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3459 } 3460 } 3461 ierr = MatDestroy(&B);CHKERRQ(ierr); 3462 /* coarse basis functions */ 3463 for (i=0;i<n_constraints;i++) { 3464 PetscScalar *y; 3465 3466 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3467 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3468 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3469 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3470 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3471 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3472 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3473 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3474 PetscInt j; 3475 3476 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3477 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3478 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3479 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3480 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3481 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3482 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3483 } 3484 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3485 } 3486 } 3487 if (n_constraints) { 3488 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3489 } 3490 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3491 3492 /* coarse matrix entries relative to B_0 */ 3493 if (pcbddc->benign_n) { 3494 Mat B0_B,B0_BPHI; 3495 IS is_dummy; 3496 PetscScalar *data; 3497 PetscInt j; 3498 3499 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3500 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3501 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3502 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3503 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3504 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3505 for (j=0;j<pcbddc->benign_n;j++) { 3506 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3507 for (i=0;i<pcbddc->local_primal_size;i++) { 3508 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3509 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3510 } 3511 } 3512 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3513 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3514 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3515 } 3516 3517 /* compute other basis functions for non-symmetric problems */ 3518 if (!pcbddc->symmetric_primal) { 3519 Mat B_V=NULL,B_C=NULL; 3520 PetscScalar *marray; 3521 3522 if (n_constraints) { 3523 Mat S_CCT,C_CRT; 3524 3525 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3526 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3527 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3528 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3529 if (n_vertices) { 3530 Mat S_VCT; 3531 3532 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3533 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3534 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3535 } 3536 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3537 } else { 3538 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3539 } 3540 if (n_vertices && n_R) { 3541 PetscScalar *av,*marray; 3542 const PetscInt *xadj,*adjncy; 3543 PetscInt n; 3544 PetscBool flg_row; 3545 3546 /* B_V = B_V - A_VR^T */ 3547 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3548 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3549 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3550 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3551 for (i=0;i<n;i++) { 3552 PetscInt j; 3553 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3554 } 3555 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3556 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3557 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3558 } 3559 3560 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3561 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3562 for (i=0;i<n_vertices;i++) { 3563 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3564 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3565 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3566 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3567 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3568 } 3569 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3570 if (B_C) { 3571 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3572 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3573 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3574 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3575 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3576 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3577 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3578 } 3579 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3580 } 3581 /* coarse basis functions */ 3582 for (i=0;i<pcbddc->local_primal_size;i++) { 3583 PetscScalar *y; 3584 3585 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3586 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3587 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3588 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3589 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3590 if (i<n_vertices) { 3591 y[n_B*i+idx_V_B[i]] = 1.0; 3592 } 3593 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3594 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3595 3596 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3597 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3598 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3599 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3600 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3601 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3602 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3603 } 3604 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3605 } 3606 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3607 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3608 } 3609 /* free memory */ 3610 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3611 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3612 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3613 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3614 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3615 ierr = PetscFree(work);CHKERRQ(ierr); 3616 if (n_vertices) { 3617 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3618 } 3619 if (n_constraints) { 3620 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3621 } 3622 /* Checking coarse_sub_mat and coarse basis functios */ 3623 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3624 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3625 if (pcbddc->dbg_flag) { 3626 Mat coarse_sub_mat; 3627 Mat AUXMAT,TM1,TM2,TM3,TM4; 3628 Mat coarse_phi_D,coarse_phi_B; 3629 Mat coarse_psi_D,coarse_psi_B; 3630 Mat A_II,A_BB,A_IB,A_BI; 3631 Mat C_B,CPHI; 3632 IS is_dummy; 3633 Vec mones; 3634 MatType checkmattype=MATSEQAIJ; 3635 PetscReal real_value; 3636 3637 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3638 Mat A; 3639 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3640 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3641 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3642 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3643 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3644 ierr = MatDestroy(&A);CHKERRQ(ierr); 3645 } else { 3646 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3647 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3648 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3649 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3650 } 3651 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3652 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3653 if (!pcbddc->symmetric_primal) { 3654 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3655 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3656 } 3657 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3658 3659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3660 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3661 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3662 if (!pcbddc->symmetric_primal) { 3663 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3664 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3665 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3666 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3667 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3668 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3669 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3670 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3671 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3672 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3673 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3674 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3675 } else { 3676 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3677 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3678 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3679 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3680 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3681 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3682 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3683 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3684 } 3685 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3686 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3687 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3688 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3689 if (pcbddc->benign_n) { 3690 Mat B0_B,B0_BPHI; 3691 PetscScalar *data,*data2; 3692 PetscInt j; 3693 3694 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3695 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3696 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3697 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3698 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3699 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3700 for (j=0;j<pcbddc->benign_n;j++) { 3701 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3702 for (i=0;i<pcbddc->local_primal_size;i++) { 3703 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3704 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3705 } 3706 } 3707 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3708 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3709 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3710 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3711 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3712 } 3713 #if 0 3714 { 3715 PetscViewer viewer; 3716 char filename[256]; 3717 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3718 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3719 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3720 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3721 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3722 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3723 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3724 if (save_change) { 3725 Mat phi_B; 3726 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3727 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3728 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3729 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3730 } else { 3731 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3732 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3733 } 3734 if (pcbddc->coarse_phi_D) { 3735 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3736 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3737 } 3738 if (pcbddc->coarse_psi_B) { 3739 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3740 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3741 } 3742 if (pcbddc->coarse_psi_D) { 3743 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3744 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3745 } 3746 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3747 } 3748 #endif 3749 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3750 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3751 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3752 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3753 3754 /* check constraints */ 3755 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3756 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3757 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3758 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3759 } else { 3760 PetscScalar *data; 3761 Mat tmat; 3762 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3764 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3765 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3766 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3767 } 3768 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3769 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3770 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3771 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3772 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3773 if (!pcbddc->symmetric_primal) { 3774 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3775 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3776 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3777 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3778 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3779 } 3780 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3781 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3782 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3783 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3784 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3785 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3786 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3787 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3788 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3789 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3790 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3791 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3792 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3793 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3794 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3795 if (!pcbddc->symmetric_primal) { 3796 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3797 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3798 } 3799 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3800 } 3801 /* get back data */ 3802 *coarse_submat_vals_n = coarse_submat_vals; 3803 PetscFunctionReturn(0); 3804 } 3805 3806 #undef __FUNCT__ 3807 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3808 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3809 { 3810 Mat *work_mat; 3811 IS isrow_s,iscol_s; 3812 PetscBool rsorted,csorted; 3813 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3814 PetscErrorCode ierr; 3815 3816 PetscFunctionBegin; 3817 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3818 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3819 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3820 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3821 3822 if (!rsorted) { 3823 const PetscInt *idxs; 3824 PetscInt *idxs_sorted,i; 3825 3826 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3827 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3828 for (i=0;i<rsize;i++) { 3829 idxs_perm_r[i] = i; 3830 } 3831 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3832 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3833 for (i=0;i<rsize;i++) { 3834 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3835 } 3836 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3837 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3838 } else { 3839 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3840 isrow_s = isrow; 3841 } 3842 3843 if (!csorted) { 3844 if (isrow == iscol) { 3845 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3846 iscol_s = isrow_s; 3847 } else { 3848 const PetscInt *idxs; 3849 PetscInt *idxs_sorted,i; 3850 3851 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3852 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3853 for (i=0;i<csize;i++) { 3854 idxs_perm_c[i] = i; 3855 } 3856 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3857 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3858 for (i=0;i<csize;i++) { 3859 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3860 } 3861 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3862 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3863 } 3864 } else { 3865 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3866 iscol_s = iscol; 3867 } 3868 3869 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3870 3871 if (!rsorted || !csorted) { 3872 Mat new_mat; 3873 IS is_perm_r,is_perm_c; 3874 3875 if (!rsorted) { 3876 PetscInt *idxs_r,i; 3877 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3878 for (i=0;i<rsize;i++) { 3879 idxs_r[idxs_perm_r[i]] = i; 3880 } 3881 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3882 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3883 } else { 3884 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3885 } 3886 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3887 3888 if (!csorted) { 3889 if (isrow_s == iscol_s) { 3890 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3891 is_perm_c = is_perm_r; 3892 } else { 3893 PetscInt *idxs_c,i; 3894 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3895 for (i=0;i<csize;i++) { 3896 idxs_c[idxs_perm_c[i]] = i; 3897 } 3898 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3899 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3900 } 3901 } else { 3902 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3903 } 3904 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3905 3906 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3907 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3908 work_mat[0] = new_mat; 3909 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3910 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3911 } 3912 3913 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 3914 *B = work_mat[0]; 3915 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 3916 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 3917 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 3918 PetscFunctionReturn(0); 3919 } 3920 3921 #undef __FUNCT__ 3922 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 3923 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 3924 { 3925 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3926 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3927 Mat new_mat; 3928 IS is_local,is_global; 3929 PetscInt local_size; 3930 PetscBool isseqaij; 3931 PetscErrorCode ierr; 3932 3933 PetscFunctionBegin; 3934 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3935 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 3936 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 3937 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 3938 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 3939 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 3940 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 3941 3942 /* check */ 3943 if (pcbddc->dbg_flag) { 3944 Vec x,x_change; 3945 PetscReal error; 3946 3947 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 3948 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3949 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 3950 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3951 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3952 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 3953 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3954 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3955 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3956 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3957 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3958 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 3959 ierr = VecDestroy(&x);CHKERRQ(ierr); 3960 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3961 } 3962 3963 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 3964 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3965 if (isseqaij) { 3966 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3967 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3968 } else { 3969 Mat work_mat; 3970 3971 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3972 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3973 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3974 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 3975 } 3976 if (matis->A->symmetric_set) { 3977 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 3978 #if !defined(PETSC_USE_COMPLEX) 3979 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 3980 #endif 3981 } 3982 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 3983 PetscFunctionReturn(0); 3984 } 3985 3986 #undef __FUNCT__ 3987 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 3988 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 3989 { 3990 PC_IS* pcis = (PC_IS*)(pc->data); 3991 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3992 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3993 PetscInt *idx_R_local=NULL; 3994 PetscInt n_vertices,i,j,n_R,n_D,n_B; 3995 PetscInt vbs,bs; 3996 PetscBT bitmask=NULL; 3997 PetscErrorCode ierr; 3998 3999 PetscFunctionBegin; 4000 /* 4001 No need to setup local scatters if 4002 - primal space is unchanged 4003 AND 4004 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4005 AND 4006 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4007 */ 4008 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4009 PetscFunctionReturn(0); 4010 } 4011 /* destroy old objects */ 4012 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4013 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4014 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4015 /* Set Non-overlapping dimensions */ 4016 n_B = pcis->n_B; 4017 n_D = pcis->n - n_B; 4018 n_vertices = pcbddc->n_vertices; 4019 4020 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4021 4022 /* create auxiliary bitmask and allocate workspace */ 4023 if (!sub_schurs || !sub_schurs->reuse_solver) { 4024 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4025 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4026 for (i=0;i<n_vertices;i++) { 4027 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4028 } 4029 4030 for (i=0, n_R=0; i<pcis->n; i++) { 4031 if (!PetscBTLookup(bitmask,i)) { 4032 idx_R_local[n_R++] = i; 4033 } 4034 } 4035 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4036 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4037 4038 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4039 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4040 } 4041 4042 /* Block code */ 4043 vbs = 1; 4044 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4045 if (bs>1 && !(n_vertices%bs)) { 4046 PetscBool is_blocked = PETSC_TRUE; 4047 PetscInt *vary; 4048 if (!sub_schurs || !sub_schurs->reuse_solver) { 4049 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4050 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4051 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4052 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4053 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4054 for (i=0; i<pcis->n/bs; i++) { 4055 if (vary[i]!=0 && vary[i]!=bs) { 4056 is_blocked = PETSC_FALSE; 4057 break; 4058 } 4059 } 4060 ierr = PetscFree(vary);CHKERRQ(ierr); 4061 } else { 4062 /* Verify directly the R set */ 4063 for (i=0; i<n_R/bs; i++) { 4064 PetscInt j,node=idx_R_local[bs*i]; 4065 for (j=1; j<bs; j++) { 4066 if (node != idx_R_local[bs*i+j]-j) { 4067 is_blocked = PETSC_FALSE; 4068 break; 4069 } 4070 } 4071 } 4072 } 4073 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4074 vbs = bs; 4075 for (i=0;i<n_R/vbs;i++) { 4076 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4077 } 4078 } 4079 } 4080 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4081 if (sub_schurs && sub_schurs->reuse_solver) { 4082 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4083 4084 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4085 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4086 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4087 reuse_solver->is_R = pcbddc->is_R_local; 4088 } else { 4089 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4090 } 4091 4092 /* print some info if requested */ 4093 if (pcbddc->dbg_flag) { 4094 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4095 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4096 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4097 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4098 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4099 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4100 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4101 } 4102 4103 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4104 if (!sub_schurs || !sub_schurs->reuse_solver) { 4105 IS is_aux1,is_aux2; 4106 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4107 4108 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4109 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4110 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4111 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4112 for (i=0; i<n_D; i++) { 4113 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4114 } 4115 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4116 for (i=0, j=0; i<n_R; i++) { 4117 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4118 aux_array1[j++] = i; 4119 } 4120 } 4121 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4122 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4123 for (i=0, j=0; i<n_B; i++) { 4124 if (!PetscBTLookup(bitmask,is_indices[i])) { 4125 aux_array2[j++] = i; 4126 } 4127 } 4128 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4129 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4130 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4131 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4132 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4133 4134 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4135 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4136 for (i=0, j=0; i<n_R; i++) { 4137 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4138 aux_array1[j++] = i; 4139 } 4140 } 4141 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4142 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4143 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4144 } 4145 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4146 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4147 } else { 4148 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4149 IS tis; 4150 PetscInt schur_size; 4151 4152 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4153 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4154 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4155 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4156 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4157 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4158 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4159 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4160 } 4161 } 4162 PetscFunctionReturn(0); 4163 } 4164 4165 4166 #undef __FUNCT__ 4167 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4168 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4169 { 4170 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4171 PC_IS *pcis = (PC_IS*)pc->data; 4172 PC pc_temp; 4173 Mat A_RR; 4174 MatReuse reuse; 4175 PetscScalar m_one = -1.0; 4176 PetscReal value; 4177 PetscInt n_D,n_R; 4178 PetscBool check_corr[2],issbaij; 4179 PetscErrorCode ierr; 4180 /* prefixes stuff */ 4181 char dir_prefix[256],neu_prefix[256],str_level[16]; 4182 size_t len; 4183 4184 PetscFunctionBegin; 4185 4186 /* compute prefixes */ 4187 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4188 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4189 if (!pcbddc->current_level) { 4190 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4191 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4192 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4193 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4194 } else { 4195 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4196 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4197 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4198 len -= 15; /* remove "pc_bddc_coarse_" */ 4199 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4200 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4201 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4202 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4203 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4204 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4205 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4206 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4207 } 4208 4209 /* DIRICHLET PROBLEM */ 4210 if (dirichlet) { 4211 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4212 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4213 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4214 if (pcbddc->dbg_flag) { 4215 Mat A_IIn; 4216 4217 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4218 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4219 pcis->A_II = A_IIn; 4220 } 4221 } 4222 if (pcbddc->local_mat->symmetric_set) { 4223 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4224 } 4225 /* Matrix for Dirichlet problem is pcis->A_II */ 4226 n_D = pcis->n - pcis->n_B; 4227 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4228 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4229 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4230 /* default */ 4231 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4232 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4233 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4234 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4235 if (issbaij) { 4236 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4237 } else { 4238 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4239 } 4240 /* Allow user's customization */ 4241 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4242 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4243 } 4244 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4245 if (sub_schurs && sub_schurs->reuse_solver) { 4246 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4247 4248 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4249 } 4250 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4251 if (!n_D) { 4252 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4253 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4254 } 4255 /* Set Up KSP for Dirichlet problem of BDDC */ 4256 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4257 /* set ksp_D into pcis data */ 4258 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4259 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4260 pcis->ksp_D = pcbddc->ksp_D; 4261 } 4262 4263 /* NEUMANN PROBLEM */ 4264 A_RR = 0; 4265 if (neumann) { 4266 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4267 PetscInt ibs,mbs; 4268 PetscBool issbaij; 4269 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4270 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4271 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4272 if (pcbddc->ksp_R) { /* already created ksp */ 4273 PetscInt nn_R; 4274 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4275 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4276 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4277 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4278 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4279 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4280 reuse = MAT_INITIAL_MATRIX; 4281 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4282 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4283 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4284 reuse = MAT_INITIAL_MATRIX; 4285 } else { /* safe to reuse the matrix */ 4286 reuse = MAT_REUSE_MATRIX; 4287 } 4288 } 4289 /* last check */ 4290 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4291 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4292 reuse = MAT_INITIAL_MATRIX; 4293 } 4294 } else { /* first time, so we need to create the matrix */ 4295 reuse = MAT_INITIAL_MATRIX; 4296 } 4297 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4298 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4299 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4300 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4301 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4302 if (matis->A == pcbddc->local_mat) { 4303 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4304 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4305 } else { 4306 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4307 } 4308 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4309 if (matis->A == pcbddc->local_mat) { 4310 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4311 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4312 } else { 4313 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4314 } 4315 } 4316 /* extract A_RR */ 4317 if (sub_schurs && sub_schurs->reuse_solver) { 4318 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4319 4320 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4321 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4322 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4323 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4324 } else { 4325 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4326 } 4327 } else { 4328 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4329 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4330 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4331 } 4332 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4333 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4334 } 4335 if (pcbddc->local_mat->symmetric_set) { 4336 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4337 } 4338 if (!pcbddc->ksp_R) { /* create object if not present */ 4339 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4340 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4341 /* default */ 4342 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4343 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4344 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4345 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4346 if (issbaij) { 4347 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4348 } else { 4349 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4350 } 4351 /* Allow user's customization */ 4352 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4353 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4354 } 4355 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4356 if (!n_R) { 4357 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4358 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4359 } 4360 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4361 /* Reuse solver if it is present */ 4362 if (sub_schurs && sub_schurs->reuse_solver) { 4363 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4364 4365 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4366 } 4367 /* Set Up KSP for Neumann problem of BDDC */ 4368 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4369 } 4370 4371 if (pcbddc->dbg_flag) { 4372 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4373 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4374 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4375 } 4376 4377 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4378 check_corr[0] = check_corr[1] = PETSC_FALSE; 4379 if (pcbddc->NullSpace_corr[0]) { 4380 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4381 } 4382 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4383 check_corr[0] = PETSC_TRUE; 4384 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4385 } 4386 if (neumann && pcbddc->NullSpace_corr[2]) { 4387 check_corr[1] = PETSC_TRUE; 4388 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4389 } 4390 4391 /* check Dirichlet and Neumann solvers */ 4392 if (pcbddc->dbg_flag) { 4393 if (dirichlet) { /* Dirichlet */ 4394 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4395 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4396 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4397 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4398 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4399 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4400 if (check_corr[0]) { 4401 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4402 } 4403 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4404 } 4405 if (neumann) { /* Neumann */ 4406 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4407 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4408 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4409 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4410 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4412 if (check_corr[1]) { 4413 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4414 } 4415 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4416 } 4417 } 4418 /* free Neumann problem's matrix */ 4419 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4420 PetscFunctionReturn(0); 4421 } 4422 4423 #undef __FUNCT__ 4424 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4425 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4426 { 4427 PetscErrorCode ierr; 4428 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4429 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4430 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4431 4432 PetscFunctionBegin; 4433 if (!reuse_solver) { 4434 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4435 } 4436 if (!pcbddc->switch_static) { 4437 if (applytranspose && pcbddc->local_auxmat1) { 4438 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4439 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4440 } 4441 if (!reuse_solver) { 4442 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4443 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4444 } else { 4445 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4446 4447 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4448 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4449 } 4450 } else { 4451 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4452 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4453 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4454 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4455 if (applytranspose && pcbddc->local_auxmat1) { 4456 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4457 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4458 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4459 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4460 } 4461 } 4462 if (!reuse_solver || pcbddc->switch_static) { 4463 if (applytranspose) { 4464 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4465 } else { 4466 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4467 } 4468 } else { 4469 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4470 4471 if (applytranspose) { 4472 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4473 } else { 4474 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4475 } 4476 } 4477 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4478 if (!pcbddc->switch_static) { 4479 if (!reuse_solver) { 4480 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4481 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4482 } else { 4483 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4484 4485 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4486 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4487 } 4488 if (!applytranspose && pcbddc->local_auxmat1) { 4489 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4490 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4491 } 4492 } else { 4493 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4494 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4495 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4496 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4497 if (!applytranspose && pcbddc->local_auxmat1) { 4498 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4499 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4500 } 4501 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4502 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4503 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4504 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4505 } 4506 PetscFunctionReturn(0); 4507 } 4508 4509 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4510 #undef __FUNCT__ 4511 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4512 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4513 { 4514 PetscErrorCode ierr; 4515 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4516 PC_IS* pcis = (PC_IS*) (pc->data); 4517 const PetscScalar zero = 0.0; 4518 4519 PetscFunctionBegin; 4520 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4521 if (!pcbddc->benign_apply_coarse_only) { 4522 if (applytranspose) { 4523 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4524 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4525 } else { 4526 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4527 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4528 } 4529 } else { 4530 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4531 } 4532 4533 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4534 if (pcbddc->benign_n) { 4535 PetscScalar *array; 4536 PetscInt j; 4537 4538 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4539 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4540 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4541 } 4542 4543 /* start communications from local primal nodes to rhs of coarse solver */ 4544 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4545 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4546 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4547 4548 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4549 if (pcbddc->coarse_ksp) { 4550 Mat coarse_mat; 4551 Vec rhs,sol; 4552 MatNullSpace nullsp; 4553 PetscBool isbddc = PETSC_FALSE; 4554 4555 if (pcbddc->benign_have_null) { 4556 PC coarse_pc; 4557 4558 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4559 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4560 /* we need to propagate to coarser levels the need for a possible benign correction */ 4561 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4562 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4563 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4564 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4565 } 4566 } 4567 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4568 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4569 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4570 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4571 if (nullsp) { 4572 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4573 } 4574 if (applytranspose) { 4575 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4576 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4577 } else { 4578 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4579 PC coarse_pc; 4580 4581 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4582 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4583 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4584 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4585 } else { 4586 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4587 } 4588 } 4589 /* we don't need the benign correction at coarser levels anymore */ 4590 if (pcbddc->benign_have_null && isbddc) { 4591 PC coarse_pc; 4592 PC_BDDC* coarsepcbddc; 4593 4594 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4595 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4596 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4597 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4598 } 4599 if (nullsp) { 4600 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4601 } 4602 } 4603 4604 /* Local solution on R nodes */ 4605 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4606 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4607 } 4608 /* communications from coarse sol to local primal nodes */ 4609 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4610 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4611 4612 /* Sum contributions from the two levels */ 4613 if (!pcbddc->benign_apply_coarse_only) { 4614 if (applytranspose) { 4615 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4616 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4617 } else { 4618 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4619 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4620 } 4621 /* store p0 */ 4622 if (pcbddc->benign_n) { 4623 PetscScalar *array; 4624 PetscInt j; 4625 4626 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4627 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4628 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4629 } 4630 } else { /* expand the coarse solution */ 4631 if (applytranspose) { 4632 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4633 } else { 4634 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4635 } 4636 } 4637 PetscFunctionReturn(0); 4638 } 4639 4640 #undef __FUNCT__ 4641 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4642 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4643 { 4644 PetscErrorCode ierr; 4645 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4646 PetscScalar *array; 4647 Vec from,to; 4648 4649 PetscFunctionBegin; 4650 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4651 from = pcbddc->coarse_vec; 4652 to = pcbddc->vec1_P; 4653 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4654 Vec tvec; 4655 4656 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4657 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4658 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4659 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4660 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4661 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4662 } 4663 } else { /* from local to global -> put data in coarse right hand side */ 4664 from = pcbddc->vec1_P; 4665 to = pcbddc->coarse_vec; 4666 } 4667 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4668 PetscFunctionReturn(0); 4669 } 4670 4671 #undef __FUNCT__ 4672 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4673 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4674 { 4675 PetscErrorCode ierr; 4676 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4677 PetscScalar *array; 4678 Vec from,to; 4679 4680 PetscFunctionBegin; 4681 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4682 from = pcbddc->coarse_vec; 4683 to = pcbddc->vec1_P; 4684 } else { /* from local to global -> put data in coarse right hand side */ 4685 from = pcbddc->vec1_P; 4686 to = pcbddc->coarse_vec; 4687 } 4688 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4689 if (smode == SCATTER_FORWARD) { 4690 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4691 Vec tvec; 4692 4693 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4694 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4695 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4696 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4697 } 4698 } else { 4699 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4700 ierr = VecResetArray(from);CHKERRQ(ierr); 4701 } 4702 } 4703 PetscFunctionReturn(0); 4704 } 4705 4706 /* uncomment for testing purposes */ 4707 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4708 #undef __FUNCT__ 4709 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4710 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4711 { 4712 PetscErrorCode ierr; 4713 PC_IS* pcis = (PC_IS*)(pc->data); 4714 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4715 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4716 /* one and zero */ 4717 PetscScalar one=1.0,zero=0.0; 4718 /* space to store constraints and their local indices */ 4719 PetscScalar *constraints_data; 4720 PetscInt *constraints_idxs,*constraints_idxs_B; 4721 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4722 PetscInt *constraints_n; 4723 /* iterators */ 4724 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4725 /* BLAS integers */ 4726 PetscBLASInt lwork,lierr; 4727 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4728 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4729 /* reuse */ 4730 PetscInt olocal_primal_size,olocal_primal_size_cc; 4731 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4732 /* change of basis */ 4733 PetscBool qr_needed; 4734 PetscBT change_basis,qr_needed_idx; 4735 /* auxiliary stuff */ 4736 PetscInt *nnz,*is_indices; 4737 PetscInt ncc; 4738 /* some quantities */ 4739 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4740 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4741 4742 PetscFunctionBegin; 4743 /* Destroy Mat objects computed previously */ 4744 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4745 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4746 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4747 /* save info on constraints from previous setup (if any) */ 4748 olocal_primal_size = pcbddc->local_primal_size; 4749 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4750 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4751 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4752 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4753 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4754 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4755 4756 if (!pcbddc->adaptive_selection) { 4757 IS ISForVertices,*ISForFaces,*ISForEdges; 4758 MatNullSpace nearnullsp; 4759 const Vec *nearnullvecs; 4760 Vec *localnearnullsp; 4761 PetscScalar *array; 4762 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4763 PetscBool nnsp_has_cnst; 4764 /* LAPACK working arrays for SVD or POD */ 4765 PetscBool skip_lapack,boolforchange; 4766 PetscScalar *work; 4767 PetscReal *singular_vals; 4768 #if defined(PETSC_USE_COMPLEX) 4769 PetscReal *rwork; 4770 #endif 4771 #if defined(PETSC_MISSING_LAPACK_GESVD) 4772 PetscScalar *temp_basis,*correlation_mat; 4773 #else 4774 PetscBLASInt dummy_int=1; 4775 PetscScalar dummy_scalar=1.; 4776 #endif 4777 4778 /* Get index sets for faces, edges and vertices from graph */ 4779 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4780 /* print some info */ 4781 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 4782 PetscInt nv; 4783 4784 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4785 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4786 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4787 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4788 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4789 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4790 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4791 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4792 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4793 } 4794 4795 /* free unneeded index sets */ 4796 if (!pcbddc->use_vertices) { 4797 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4798 } 4799 if (!pcbddc->use_edges) { 4800 for (i=0;i<n_ISForEdges;i++) { 4801 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4802 } 4803 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4804 n_ISForEdges = 0; 4805 } 4806 if (!pcbddc->use_faces) { 4807 for (i=0;i<n_ISForFaces;i++) { 4808 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4809 } 4810 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4811 n_ISForFaces = 0; 4812 } 4813 4814 /* check if near null space is attached to global mat */ 4815 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4816 if (nearnullsp) { 4817 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4818 /* remove any stored info */ 4819 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4820 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4821 /* store information for BDDC solver reuse */ 4822 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4823 pcbddc->onearnullspace = nearnullsp; 4824 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4825 for (i=0;i<nnsp_size;i++) { 4826 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4827 } 4828 } else { /* if near null space is not provided BDDC uses constants by default */ 4829 nnsp_size = 0; 4830 nnsp_has_cnst = PETSC_TRUE; 4831 } 4832 /* get max number of constraints on a single cc */ 4833 max_constraints = nnsp_size; 4834 if (nnsp_has_cnst) max_constraints++; 4835 4836 /* 4837 Evaluate maximum storage size needed by the procedure 4838 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4839 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4840 There can be multiple constraints per connected component 4841 */ 4842 n_vertices = 0; 4843 if (ISForVertices) { 4844 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4845 } 4846 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4847 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4848 4849 total_counts = n_ISForFaces+n_ISForEdges; 4850 total_counts *= max_constraints; 4851 total_counts += n_vertices; 4852 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4853 4854 total_counts = 0; 4855 max_size_of_constraint = 0; 4856 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4857 IS used_is; 4858 if (i<n_ISForEdges) { 4859 used_is = ISForEdges[i]; 4860 } else { 4861 used_is = ISForFaces[i-n_ISForEdges]; 4862 } 4863 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4864 total_counts += j; 4865 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4866 } 4867 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4868 4869 /* get local part of global near null space vectors */ 4870 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4871 for (k=0;k<nnsp_size;k++) { 4872 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4873 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4874 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4875 } 4876 4877 /* whether or not to skip lapack calls */ 4878 skip_lapack = PETSC_TRUE; 4879 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4880 4881 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4882 if (!skip_lapack) { 4883 PetscScalar temp_work; 4884 4885 #if defined(PETSC_MISSING_LAPACK_GESVD) 4886 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4887 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4888 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4889 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4890 #if defined(PETSC_USE_COMPLEX) 4891 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4892 #endif 4893 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4894 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4895 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4896 lwork = -1; 4897 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4898 #if !defined(PETSC_USE_COMPLEX) 4899 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4900 #else 4901 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4902 #endif 4903 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4904 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4905 #else /* on missing GESVD */ 4906 /* SVD */ 4907 PetscInt max_n,min_n; 4908 max_n = max_size_of_constraint; 4909 min_n = max_constraints; 4910 if (max_size_of_constraint < max_constraints) { 4911 min_n = max_size_of_constraint; 4912 max_n = max_constraints; 4913 } 4914 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 4915 #if defined(PETSC_USE_COMPLEX) 4916 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 4917 #endif 4918 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4919 lwork = -1; 4920 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 4921 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 4922 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 4923 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4924 #if !defined(PETSC_USE_COMPLEX) 4925 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 4926 #else 4927 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 4928 #endif 4929 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4930 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 4931 #endif /* on missing GESVD */ 4932 /* Allocate optimal workspace */ 4933 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 4934 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 4935 } 4936 /* Now we can loop on constraining sets */ 4937 total_counts = 0; 4938 constraints_idxs_ptr[0] = 0; 4939 constraints_data_ptr[0] = 0; 4940 /* vertices */ 4941 if (n_vertices) { 4942 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4943 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4944 for (i=0;i<n_vertices;i++) { 4945 constraints_n[total_counts] = 1; 4946 constraints_data[total_counts] = 1.0; 4947 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 4948 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 4949 total_counts++; 4950 } 4951 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4952 n_vertices = total_counts; 4953 } 4954 4955 /* edges and faces */ 4956 total_counts_cc = total_counts; 4957 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 4958 IS used_is; 4959 PetscBool idxs_copied = PETSC_FALSE; 4960 4961 if (ncc<n_ISForEdges) { 4962 used_is = ISForEdges[ncc]; 4963 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 4964 } else { 4965 used_is = ISForFaces[ncc-n_ISForEdges]; 4966 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 4967 } 4968 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 4969 4970 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 4971 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4972 /* change of basis should not be performed on local periodic nodes */ 4973 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 4974 if (nnsp_has_cnst) { 4975 PetscScalar quad_value; 4976 4977 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4978 idxs_copied = PETSC_TRUE; 4979 4980 if (!pcbddc->use_nnsp_true) { 4981 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 4982 } else { 4983 quad_value = 1.0; 4984 } 4985 for (j=0;j<size_of_constraint;j++) { 4986 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 4987 } 4988 temp_constraints++; 4989 total_counts++; 4990 } 4991 for (k=0;k<nnsp_size;k++) { 4992 PetscReal real_value; 4993 PetscScalar *ptr_to_data; 4994 4995 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4996 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 4997 for (j=0;j<size_of_constraint;j++) { 4998 ptr_to_data[j] = array[is_indices[j]]; 4999 } 5000 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5001 /* check if array is null on the connected component */ 5002 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5003 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5004 if (real_value > 0.0) { /* keep indices and values */ 5005 temp_constraints++; 5006 total_counts++; 5007 if (!idxs_copied) { 5008 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5009 idxs_copied = PETSC_TRUE; 5010 } 5011 } 5012 } 5013 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5014 valid_constraints = temp_constraints; 5015 if (!pcbddc->use_nnsp_true && temp_constraints) { 5016 if (temp_constraints == 1) { /* just normalize the constraint */ 5017 PetscScalar norm,*ptr_to_data; 5018 5019 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5020 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5021 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5022 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5023 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5024 } else { /* perform SVD */ 5025 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5026 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5027 5028 #if defined(PETSC_MISSING_LAPACK_GESVD) 5029 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5030 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5031 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5032 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5033 from that computed using LAPACKgesvd 5034 -> This is due to a different computation of eigenvectors in LAPACKheev 5035 -> The quality of the POD-computed basis will be the same */ 5036 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5037 /* Store upper triangular part of correlation matrix */ 5038 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5039 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5040 for (j=0;j<temp_constraints;j++) { 5041 for (k=0;k<j+1;k++) { 5042 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5043 } 5044 } 5045 /* compute eigenvalues and eigenvectors of correlation matrix */ 5046 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5047 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5048 #if !defined(PETSC_USE_COMPLEX) 5049 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5050 #else 5051 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5052 #endif 5053 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5054 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5055 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5056 j = 0; 5057 while (j < temp_constraints && singular_vals[j] < tol) j++; 5058 total_counts = total_counts-j; 5059 valid_constraints = temp_constraints-j; 5060 /* scale and copy POD basis into used quadrature memory */ 5061 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5062 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5063 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5064 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5065 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5066 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5067 if (j<temp_constraints) { 5068 PetscInt ii; 5069 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5070 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5071 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5072 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5073 for (k=0;k<temp_constraints-j;k++) { 5074 for (ii=0;ii<size_of_constraint;ii++) { 5075 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5076 } 5077 } 5078 } 5079 #else /* on missing GESVD */ 5080 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5081 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5082 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5083 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5084 #if !defined(PETSC_USE_COMPLEX) 5085 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5086 #else 5087 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5088 #endif 5089 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5090 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5091 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5092 k = temp_constraints; 5093 if (k > size_of_constraint) k = size_of_constraint; 5094 j = 0; 5095 while (j < k && singular_vals[k-j-1] < tol) j++; 5096 valid_constraints = k-j; 5097 total_counts = total_counts-temp_constraints+valid_constraints; 5098 #endif /* on missing GESVD */ 5099 } 5100 } 5101 /* update pointers information */ 5102 if (valid_constraints) { 5103 constraints_n[total_counts_cc] = valid_constraints; 5104 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5105 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5106 /* set change_of_basis flag */ 5107 if (boolforchange) { 5108 PetscBTSet(change_basis,total_counts_cc); 5109 } 5110 total_counts_cc++; 5111 } 5112 } 5113 /* free workspace */ 5114 if (!skip_lapack) { 5115 ierr = PetscFree(work);CHKERRQ(ierr); 5116 #if defined(PETSC_USE_COMPLEX) 5117 ierr = PetscFree(rwork);CHKERRQ(ierr); 5118 #endif 5119 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5120 #if defined(PETSC_MISSING_LAPACK_GESVD) 5121 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5122 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5123 #endif 5124 } 5125 for (k=0;k<nnsp_size;k++) { 5126 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5127 } 5128 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5129 /* free index sets of faces, edges and vertices */ 5130 for (i=0;i<n_ISForFaces;i++) { 5131 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5132 } 5133 if (n_ISForFaces) { 5134 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5135 } 5136 for (i=0;i<n_ISForEdges;i++) { 5137 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5138 } 5139 if (n_ISForEdges) { 5140 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5141 } 5142 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5143 } else { 5144 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5145 5146 total_counts = 0; 5147 n_vertices = 0; 5148 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5149 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5150 } 5151 max_constraints = 0; 5152 total_counts_cc = 0; 5153 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5154 total_counts += pcbddc->adaptive_constraints_n[i]; 5155 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5156 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5157 } 5158 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5159 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5160 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5161 constraints_data = pcbddc->adaptive_constraints_data; 5162 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5163 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5164 total_counts_cc = 0; 5165 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5166 if (pcbddc->adaptive_constraints_n[i]) { 5167 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5168 } 5169 } 5170 #if 0 5171 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5172 for (i=0;i<total_counts_cc;i++) { 5173 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5174 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5175 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5176 printf(" %d",constraints_idxs[j]); 5177 } 5178 printf("\n"); 5179 printf("number of cc: %d\n",constraints_n[i]); 5180 } 5181 for (i=0;i<n_vertices;i++) { 5182 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5183 } 5184 for (i=0;i<sub_schurs->n_subs;i++) { 5185 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5186 } 5187 #endif 5188 5189 max_size_of_constraint = 0; 5190 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5191 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5192 /* Change of basis */ 5193 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5194 if (pcbddc->use_change_of_basis) { 5195 for (i=0;i<sub_schurs->n_subs;i++) { 5196 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5197 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5198 } 5199 } 5200 } 5201 } 5202 pcbddc->local_primal_size = total_counts; 5203 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5204 5205 /* map constraints_idxs in boundary numbering */ 5206 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5207 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5208 5209 /* Create constraint matrix */ 5210 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5211 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5212 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5213 5214 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5215 /* determine if a QR strategy is needed for change of basis */ 5216 qr_needed = PETSC_FALSE; 5217 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5218 total_primal_vertices=0; 5219 pcbddc->local_primal_size_cc = 0; 5220 for (i=0;i<total_counts_cc;i++) { 5221 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5222 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5223 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5224 pcbddc->local_primal_size_cc += 1; 5225 } else if (PetscBTLookup(change_basis,i)) { 5226 for (k=0;k<constraints_n[i];k++) { 5227 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5228 } 5229 pcbddc->local_primal_size_cc += constraints_n[i]; 5230 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5231 PetscBTSet(qr_needed_idx,i); 5232 qr_needed = PETSC_TRUE; 5233 } 5234 } else { 5235 pcbddc->local_primal_size_cc += 1; 5236 } 5237 } 5238 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5239 pcbddc->n_vertices = total_primal_vertices; 5240 /* permute indices in order to have a sorted set of vertices */ 5241 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5242 5243 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5244 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5245 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5246 5247 /* nonzero structure of constraint matrix */ 5248 /* and get reference dof for local constraints */ 5249 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5250 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5251 5252 j = total_primal_vertices; 5253 total_counts = total_primal_vertices; 5254 cum = total_primal_vertices; 5255 for (i=n_vertices;i<total_counts_cc;i++) { 5256 if (!PetscBTLookup(change_basis,i)) { 5257 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5258 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5259 cum++; 5260 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5261 for (k=0;k<constraints_n[i];k++) { 5262 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5263 nnz[j+k] = size_of_constraint; 5264 } 5265 j += constraints_n[i]; 5266 } 5267 } 5268 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5269 ierr = PetscFree(nnz);CHKERRQ(ierr); 5270 5271 /* set values in constraint matrix */ 5272 for (i=0;i<total_primal_vertices;i++) { 5273 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5274 } 5275 total_counts = total_primal_vertices; 5276 for (i=n_vertices;i<total_counts_cc;i++) { 5277 if (!PetscBTLookup(change_basis,i)) { 5278 PetscInt *cols; 5279 5280 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5281 cols = constraints_idxs+constraints_idxs_ptr[i]; 5282 for (k=0;k<constraints_n[i];k++) { 5283 PetscInt row = total_counts+k; 5284 PetscScalar *vals; 5285 5286 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5287 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5288 } 5289 total_counts += constraints_n[i]; 5290 } 5291 } 5292 /* assembling */ 5293 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5294 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5295 5296 /* 5297 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5298 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5299 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5300 */ 5301 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5302 if (pcbddc->use_change_of_basis) { 5303 /* dual and primal dofs on a single cc */ 5304 PetscInt dual_dofs,primal_dofs; 5305 /* working stuff for GEQRF */ 5306 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5307 PetscBLASInt lqr_work; 5308 /* working stuff for UNGQR */ 5309 PetscScalar *gqr_work,lgqr_work_t; 5310 PetscBLASInt lgqr_work; 5311 /* working stuff for TRTRS */ 5312 PetscScalar *trs_rhs; 5313 PetscBLASInt Blas_NRHS; 5314 /* pointers for values insertion into change of basis matrix */ 5315 PetscInt *start_rows,*start_cols; 5316 PetscScalar *start_vals; 5317 /* working stuff for values insertion */ 5318 PetscBT is_primal; 5319 PetscInt *aux_primal_numbering_B; 5320 /* matrix sizes */ 5321 PetscInt global_size,local_size; 5322 /* temporary change of basis */ 5323 Mat localChangeOfBasisMatrix; 5324 /* extra space for debugging */ 5325 PetscScalar *dbg_work; 5326 5327 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5328 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5329 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5330 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5331 /* nonzeros for local mat */ 5332 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5333 if (!pcbddc->benign_change || pcbddc->fake_change) { 5334 for (i=0;i<pcis->n;i++) nnz[i]=1; 5335 } else { 5336 const PetscInt *ii; 5337 PetscInt n; 5338 PetscBool flg_row; 5339 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5340 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5341 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5342 } 5343 for (i=n_vertices;i<total_counts_cc;i++) { 5344 if (PetscBTLookup(change_basis,i)) { 5345 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5346 if (PetscBTLookup(qr_needed_idx,i)) { 5347 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5348 } else { 5349 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5350 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5351 } 5352 } 5353 } 5354 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5355 ierr = PetscFree(nnz);CHKERRQ(ierr); 5356 /* Set interior change in the matrix */ 5357 if (!pcbddc->benign_change || pcbddc->fake_change) { 5358 for (i=0;i<pcis->n;i++) { 5359 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5360 } 5361 } else { 5362 const PetscInt *ii,*jj; 5363 PetscScalar *aa; 5364 PetscInt n; 5365 PetscBool flg_row; 5366 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5367 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5368 for (i=0;i<n;i++) { 5369 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5370 } 5371 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5372 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5373 } 5374 5375 if (pcbddc->dbg_flag) { 5376 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5377 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5378 } 5379 5380 5381 /* Now we loop on the constraints which need a change of basis */ 5382 /* 5383 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5384 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5385 5386 Basic blocks of change of basis matrix T computed by 5387 5388 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5389 5390 | 1 0 ... 0 s_1/S | 5391 | 0 1 ... 0 s_2/S | 5392 | ... | 5393 | 0 ... 1 s_{n-1}/S | 5394 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5395 5396 with S = \sum_{i=1}^n s_i^2 5397 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5398 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5399 5400 - QR decomposition of constraints otherwise 5401 */ 5402 if (qr_needed) { 5403 /* space to store Q */ 5404 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5405 /* array to store scaling factors for reflectors */ 5406 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5407 /* first we issue queries for optimal work */ 5408 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5409 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5410 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5411 lqr_work = -1; 5412 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5413 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5414 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5415 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5416 lgqr_work = -1; 5417 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5418 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5419 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5420 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5421 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5422 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5423 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5424 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5425 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5426 /* array to store rhs and solution of triangular solver */ 5427 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5428 /* allocating workspace for check */ 5429 if (pcbddc->dbg_flag) { 5430 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5431 } 5432 } 5433 /* array to store whether a node is primal or not */ 5434 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5435 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5436 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5437 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5438 for (i=0;i<total_primal_vertices;i++) { 5439 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5440 } 5441 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5442 5443 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5444 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5445 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5446 if (PetscBTLookup(change_basis,total_counts)) { 5447 /* get constraint info */ 5448 primal_dofs = constraints_n[total_counts]; 5449 dual_dofs = size_of_constraint-primal_dofs; 5450 5451 if (pcbddc->dbg_flag) { 5452 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5453 } 5454 5455 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5456 5457 /* copy quadrature constraints for change of basis check */ 5458 if (pcbddc->dbg_flag) { 5459 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5460 } 5461 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5462 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5463 5464 /* compute QR decomposition of constraints */ 5465 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5466 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5467 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5468 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5469 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5470 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5471 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5472 5473 /* explictly compute R^-T */ 5474 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5475 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5476 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5477 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5478 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5479 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5480 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5481 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5482 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5483 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5484 5485 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5486 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5487 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5488 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5489 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5490 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5491 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5492 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5493 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5494 5495 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5496 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5497 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5499 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5500 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5501 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5502 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5503 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5504 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5505 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5506 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5507 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5508 5509 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5510 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5511 /* insert cols for primal dofs */ 5512 for (j=0;j<primal_dofs;j++) { 5513 start_vals = &qr_basis[j*size_of_constraint]; 5514 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5515 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5516 } 5517 /* insert cols for dual dofs */ 5518 for (j=0,k=0;j<dual_dofs;k++) { 5519 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5520 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5521 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5522 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5523 j++; 5524 } 5525 } 5526 5527 /* check change of basis */ 5528 if (pcbddc->dbg_flag) { 5529 PetscInt ii,jj; 5530 PetscBool valid_qr=PETSC_TRUE; 5531 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5533 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5535 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5536 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5537 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5538 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5539 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5540 for (jj=0;jj<size_of_constraint;jj++) { 5541 for (ii=0;ii<primal_dofs;ii++) { 5542 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5543 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5544 } 5545 } 5546 if (!valid_qr) { 5547 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5548 for (jj=0;jj<size_of_constraint;jj++) { 5549 for (ii=0;ii<primal_dofs;ii++) { 5550 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5551 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5552 } 5553 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5554 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5555 } 5556 } 5557 } 5558 } else { 5559 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5560 } 5561 } 5562 } else { /* simple transformation block */ 5563 PetscInt row,col; 5564 PetscScalar val,norm; 5565 5566 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5567 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5568 for (j=0;j<size_of_constraint;j++) { 5569 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5570 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5571 if (!PetscBTLookup(is_primal,row_B)) { 5572 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5573 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5574 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5575 } else { 5576 for (k=0;k<size_of_constraint;k++) { 5577 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5578 if (row != col) { 5579 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5580 } else { 5581 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5582 } 5583 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5584 } 5585 } 5586 } 5587 if (pcbddc->dbg_flag) { 5588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5589 } 5590 } 5591 } else { 5592 if (pcbddc->dbg_flag) { 5593 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5594 } 5595 } 5596 } 5597 5598 /* free workspace */ 5599 if (qr_needed) { 5600 if (pcbddc->dbg_flag) { 5601 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5602 } 5603 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5604 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5605 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5606 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5607 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5608 } 5609 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5610 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5611 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5612 5613 /* assembling of global change of variable */ 5614 if (!pcbddc->fake_change) { 5615 Mat tmat; 5616 PetscInt bs; 5617 5618 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5619 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5620 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5621 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5622 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5623 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5624 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5625 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5626 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5627 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5628 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5629 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5630 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5631 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5632 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5633 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5634 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5635 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5636 5637 /* check */ 5638 if (pcbddc->dbg_flag) { 5639 PetscReal error; 5640 Vec x,x_change; 5641 5642 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5643 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5644 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5645 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5646 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5647 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5648 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5649 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5650 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5651 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5652 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5653 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5654 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5655 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5656 ierr = VecDestroy(&x);CHKERRQ(ierr); 5657 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5658 } 5659 /* adapt sub_schurs computed (if any) */ 5660 if (pcbddc->use_deluxe_scaling) { 5661 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5662 5663 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5664 if (sub_schurs && sub_schurs->S_Ej_all) { 5665 Mat S_new,tmat; 5666 IS is_all_N,is_V_Sall = NULL; 5667 5668 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5669 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5670 if (pcbddc->deluxe_zerorows) { 5671 ISLocalToGlobalMapping NtoSall; 5672 IS is_V; 5673 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5674 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5675 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5676 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5677 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5678 } 5679 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5680 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5681 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5682 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5683 if (pcbddc->deluxe_zerorows) { 5684 const PetscScalar *array; 5685 const PetscInt *idxs_V,*idxs_all; 5686 PetscInt i,n_V; 5687 5688 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5689 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5690 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5691 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5692 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5693 for (i=0;i<n_V;i++) { 5694 PetscScalar val; 5695 PetscInt idx; 5696 5697 idx = idxs_V[i]; 5698 val = array[idxs_all[idxs_V[i]]]; 5699 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5700 } 5701 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5702 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5703 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5704 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5705 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5706 } 5707 sub_schurs->S_Ej_all = S_new; 5708 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5709 if (sub_schurs->sum_S_Ej_all) { 5710 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5711 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5712 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5713 if (pcbddc->deluxe_zerorows) { 5714 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5715 } 5716 sub_schurs->sum_S_Ej_all = S_new; 5717 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5718 } 5719 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5720 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5721 } 5722 /* destroy any change of basis context in sub_schurs */ 5723 if (sub_schurs && sub_schurs->change) { 5724 PetscInt i; 5725 5726 for (i=0;i<sub_schurs->n_subs;i++) { 5727 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5728 } 5729 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5730 } 5731 } 5732 if (pcbddc->switch_static) { /* need to save the local change */ 5733 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5734 } else { 5735 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5736 } 5737 /* determine if any process has changed the pressures locally */ 5738 pcbddc->change_interior = pcbddc->benign_have_null; 5739 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5740 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5741 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5742 pcbddc->use_qr_single = qr_needed; 5743 } 5744 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5745 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5746 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5747 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5748 } else { 5749 Mat benign_global = NULL; 5750 if (pcbddc->benign_have_null) { 5751 Mat tmat; 5752 5753 pcbddc->change_interior = PETSC_TRUE; 5754 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5755 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5756 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5757 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5758 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5759 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5760 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5761 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5762 if (pcbddc->benign_change) { 5763 Mat M; 5764 5765 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5766 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5767 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5768 ierr = MatDestroy(&M);CHKERRQ(ierr); 5769 } else { 5770 Mat eye; 5771 PetscScalar *array; 5772 5773 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5774 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5775 for (i=0;i<pcis->n;i++) { 5776 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5777 } 5778 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5779 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5780 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5781 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5782 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5783 } 5784 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5785 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5786 } 5787 if (pcbddc->user_ChangeOfBasisMatrix) { 5788 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5789 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5790 } else if (pcbddc->benign_have_null) { 5791 pcbddc->ChangeOfBasisMatrix = benign_global; 5792 } 5793 } 5794 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5795 IS is_global; 5796 const PetscInt *gidxs; 5797 5798 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5799 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5800 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5801 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5802 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5803 } 5804 } 5805 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5806 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5807 } 5808 5809 if (!pcbddc->fake_change) { 5810 /* add pressure dofs to set of primal nodes for numbering purposes */ 5811 for (i=0;i<pcbddc->benign_n;i++) { 5812 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5813 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5814 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5815 pcbddc->local_primal_size_cc++; 5816 pcbddc->local_primal_size++; 5817 } 5818 5819 /* check if a new primal space has been introduced (also take into account benign trick) */ 5820 pcbddc->new_primal_space_local = PETSC_TRUE; 5821 if (olocal_primal_size == pcbddc->local_primal_size) { 5822 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5823 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5824 if (!pcbddc->new_primal_space_local) { 5825 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5826 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5827 } 5828 } 5829 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5830 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5831 } 5832 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5833 5834 /* flush dbg viewer */ 5835 if (pcbddc->dbg_flag) { 5836 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5837 } 5838 5839 /* free workspace */ 5840 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5841 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5842 if (!pcbddc->adaptive_selection) { 5843 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5844 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5845 } else { 5846 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5847 pcbddc->adaptive_constraints_idxs_ptr, 5848 pcbddc->adaptive_constraints_data_ptr, 5849 pcbddc->adaptive_constraints_idxs, 5850 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5851 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5852 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5853 } 5854 PetscFunctionReturn(0); 5855 } 5856 5857 #undef __FUNCT__ 5858 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5859 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5860 { 5861 ISLocalToGlobalMapping map; 5862 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5863 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5864 PetscInt ierr,i,N; 5865 5866 PetscFunctionBegin; 5867 if (pcbddc->graphanalyzed) PetscFunctionReturn(0); 5868 /* Reset previously computed graph */ 5869 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5870 /* Init local Graph struct */ 5871 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5872 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5873 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5874 5875 /* Check validity of the csr graph passed in by the user */ 5876 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5877 5878 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5879 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5880 PetscInt *xadj,*adjncy; 5881 PetscInt nvtxs; 5882 PetscBool flg_row=PETSC_FALSE; 5883 5884 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5885 if (flg_row) { 5886 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5887 pcbddc->computed_rowadj = PETSC_TRUE; 5888 } 5889 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5890 } 5891 if (pcbddc->dbg_flag) { 5892 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5893 } 5894 5895 /* Setup of Graph */ 5896 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5897 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5898 5899 /* attach info on disconnected subdomains if present */ 5900 if (pcbddc->n_local_subs) { 5901 PetscInt *local_subs; 5902 5903 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5904 for (i=0;i<pcbddc->n_local_subs;i++) { 5905 const PetscInt *idxs; 5906 PetscInt nl,j; 5907 5908 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5909 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5910 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 5911 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5912 } 5913 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 5914 pcbddc->mat_graph->local_subs = local_subs; 5915 } 5916 5917 /* Graph's connected components analysis */ 5918 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 5919 5920 /* set flag indicating analysis has been done */ 5921 pcbddc->graphanalyzed = PETSC_TRUE; 5922 PetscFunctionReturn(0); 5923 } 5924 5925 #undef __FUNCT__ 5926 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 5927 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 5928 { 5929 PetscInt i,j; 5930 PetscScalar *alphas; 5931 PetscErrorCode ierr; 5932 5933 PetscFunctionBegin; 5934 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 5935 for (i=0;i<n;i++) { 5936 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 5937 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 5938 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 5939 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5940 } 5941 ierr = PetscFree(alphas);CHKERRQ(ierr); 5942 PetscFunctionReturn(0); 5943 } 5944 5945 #undef __FUNCT__ 5946 #define __FUNCT__ "MatISGetSubassemblingPattern" 5947 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5948 { 5949 Mat A; 5950 PetscInt n_neighs,*neighs,*n_shared,**shared; 5951 PetscMPIInt size,rank,color; 5952 PetscInt *xadj,*adjncy; 5953 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5954 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5955 PetscInt void_procs,*procs_candidates = NULL; 5956 PetscInt xadj_count, *count; 5957 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5958 PetscSubcomm psubcomm; 5959 MPI_Comm subcomm; 5960 PetscErrorCode ierr; 5961 5962 PetscFunctionBegin; 5963 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5964 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5965 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5966 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5967 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5968 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5969 5970 if (have_void) *have_void = PETSC_FALSE; 5971 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5972 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5973 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5974 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5975 im_active = !!(n); 5976 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5977 void_procs = size - active_procs; 5978 /* get ranks of of non-active processes in mat communicator */ 5979 if (void_procs) { 5980 PetscInt ncand; 5981 5982 if (have_void) *have_void = PETSC_TRUE; 5983 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5984 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5985 for (i=0,ncand=0;i<size;i++) { 5986 if (!procs_candidates[i]) { 5987 procs_candidates[ncand++] = i; 5988 } 5989 } 5990 /* force n_subdomains to be not greater that the number of non-active processes */ 5991 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5992 } 5993 5994 /* number of subdomains requested greater than active processes -> just shift the matrix 5995 number of subdomains requested 1 -> send to master or first candidate in voids */ 5996 if (active_procs < *n_subdomains || *n_subdomains == 1) { 5997 PetscInt issize,isidx,dest; 5998 if (*n_subdomains == 1) dest = 0; 5999 else dest = rank; 6000 if (im_active) { 6001 issize = 1; 6002 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6003 isidx = procs_candidates[dest]; 6004 } else { 6005 isidx = dest; 6006 } 6007 } else { 6008 issize = 0; 6009 isidx = -1; 6010 } 6011 *n_subdomains = active_procs; 6012 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6013 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6014 PetscFunctionReturn(0); 6015 } 6016 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6017 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6018 threshold = PetscMax(threshold,2); 6019 6020 /* Get info on mapping */ 6021 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 6022 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6023 6024 /* build local CSR graph of subdomains' connectivity */ 6025 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6026 xadj[0] = 0; 6027 xadj[1] = PetscMax(n_neighs-1,0); 6028 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6029 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6030 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 6031 for (i=1;i<n_neighs;i++) 6032 for (j=0;j<n_shared[i];j++) 6033 count[shared[i][j]] += 1; 6034 6035 xadj_count = 0; 6036 for (i=1;i<n_neighs;i++) { 6037 for (j=0;j<n_shared[i];j++) { 6038 if (count[shared[i][j]] < threshold) { 6039 adjncy[xadj_count] = neighs[i]; 6040 adjncy_wgt[xadj_count] = n_shared[i]; 6041 xadj_count++; 6042 break; 6043 } 6044 } 6045 } 6046 xadj[1] = xadj_count; 6047 ierr = PetscFree(count);CHKERRQ(ierr); 6048 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6049 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6050 6051 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6052 6053 /* Restrict work on active processes only */ 6054 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6055 if (void_procs) { 6056 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6057 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6058 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6059 subcomm = PetscSubcommChild(psubcomm); 6060 } else { 6061 psubcomm = NULL; 6062 subcomm = PetscObjectComm((PetscObject)mat); 6063 } 6064 6065 v_wgt = NULL; 6066 if (!color) { 6067 ierr = PetscFree(xadj);CHKERRQ(ierr); 6068 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6069 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6070 } else { 6071 Mat subdomain_adj; 6072 IS new_ranks,new_ranks_contig; 6073 MatPartitioning partitioner; 6074 PetscInt rstart=0,rend=0; 6075 PetscInt *is_indices,*oldranks; 6076 PetscMPIInt size; 6077 PetscBool aggregate; 6078 6079 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6080 if (void_procs) { 6081 PetscInt prank = rank; 6082 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6083 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6084 for (i=0;i<xadj[1];i++) { 6085 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6086 } 6087 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6088 } else { 6089 oldranks = NULL; 6090 } 6091 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6092 if (aggregate) { /* TODO: all this part could be made more efficient */ 6093 PetscInt lrows,row,ncols,*cols; 6094 PetscMPIInt nrank; 6095 PetscScalar *vals; 6096 6097 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6098 lrows = 0; 6099 if (nrank<redprocs) { 6100 lrows = size/redprocs; 6101 if (nrank<size%redprocs) lrows++; 6102 } 6103 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6104 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6105 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6106 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6107 row = nrank; 6108 ncols = xadj[1]-xadj[0]; 6109 cols = adjncy; 6110 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6111 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6112 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6113 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6114 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6115 ierr = PetscFree(xadj);CHKERRQ(ierr); 6116 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6117 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6118 ierr = PetscFree(vals);CHKERRQ(ierr); 6119 if (use_vwgt) { 6120 Vec v; 6121 const PetscScalar *array; 6122 PetscInt nl; 6123 6124 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6125 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6126 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6127 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6128 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6129 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6130 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6131 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6132 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6133 ierr = VecDestroy(&v);CHKERRQ(ierr); 6134 } 6135 } else { 6136 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6137 if (use_vwgt) { 6138 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6139 v_wgt[0] = local_size; 6140 } 6141 } 6142 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6143 6144 /* Partition */ 6145 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6146 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6147 if (v_wgt) { 6148 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6149 } 6150 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6151 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6152 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6153 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6154 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6155 6156 /* renumber new_ranks to avoid "holes" in new set of processors */ 6157 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6158 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6159 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6160 if (!aggregate) { 6161 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6162 #if defined(PETSC_USE_DEBUG) 6163 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6164 #endif 6165 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6166 } else if (oldranks) { 6167 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6168 } else { 6169 ranks_send_to_idx[0] = is_indices[0]; 6170 } 6171 } else { 6172 PetscInt idxs[1]; 6173 PetscMPIInt tag; 6174 MPI_Request *reqs; 6175 6176 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6177 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6178 for (i=rstart;i<rend;i++) { 6179 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6180 } 6181 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6182 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6183 ierr = PetscFree(reqs);CHKERRQ(ierr); 6184 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6185 #if defined(PETSC_USE_DEBUG) 6186 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6187 #endif 6188 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6189 } else if (oldranks) { 6190 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6191 } else { 6192 ranks_send_to_idx[0] = idxs[0]; 6193 } 6194 } 6195 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6196 /* clean up */ 6197 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6198 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6199 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6200 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6201 } 6202 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6203 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6204 6205 /* assemble parallel IS for sends */ 6206 i = 1; 6207 if (!color) i=0; 6208 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6209 PetscFunctionReturn(0); 6210 } 6211 6212 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6213 6214 #undef __FUNCT__ 6215 #define __FUNCT__ "MatISSubassemble" 6216 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6217 { 6218 Mat local_mat; 6219 IS is_sends_internal; 6220 PetscInt rows,cols,new_local_rows; 6221 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6222 PetscBool ismatis,isdense,newisdense,destroy_mat; 6223 ISLocalToGlobalMapping l2gmap; 6224 PetscInt* l2gmap_indices; 6225 const PetscInt* is_indices; 6226 MatType new_local_type; 6227 /* buffers */ 6228 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6229 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6230 PetscInt *recv_buffer_idxs_local; 6231 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6232 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6233 /* MPI */ 6234 MPI_Comm comm,comm_n; 6235 PetscSubcomm subcomm; 6236 PetscMPIInt n_sends,n_recvs,commsize; 6237 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6238 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6239 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6240 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6241 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6242 PetscErrorCode ierr; 6243 6244 PetscFunctionBegin; 6245 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6246 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6247 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6248 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6249 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6250 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6251 PetscValidLogicalCollectiveBool(mat,reuse,6); 6252 PetscValidLogicalCollectiveInt(mat,nis,8); 6253 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6254 if (nvecs) { 6255 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6256 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6257 } 6258 /* further checks */ 6259 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6260 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6261 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6262 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6263 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6264 if (reuse && *mat_n) { 6265 PetscInt mrows,mcols,mnrows,mncols; 6266 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6267 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6268 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6269 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6270 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6271 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6272 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6273 } 6274 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6275 PetscValidLogicalCollectiveInt(mat,bs,0); 6276 6277 /* prepare IS for sending if not provided */ 6278 if (!is_sends) { 6279 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6280 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6281 } else { 6282 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6283 is_sends_internal = is_sends; 6284 } 6285 6286 /* get comm */ 6287 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6288 6289 /* compute number of sends */ 6290 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6291 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6292 6293 /* compute number of receives */ 6294 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6295 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6296 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6297 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6298 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6299 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6300 ierr = PetscFree(iflags);CHKERRQ(ierr); 6301 6302 /* restrict comm if requested */ 6303 subcomm = 0; 6304 destroy_mat = PETSC_FALSE; 6305 if (restrict_comm) { 6306 PetscMPIInt color,subcommsize; 6307 6308 color = 0; 6309 if (restrict_full) { 6310 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6311 } else { 6312 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6313 } 6314 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6315 subcommsize = commsize - subcommsize; 6316 /* check if reuse has been requested */ 6317 if (reuse) { 6318 if (*mat_n) { 6319 PetscMPIInt subcommsize2; 6320 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6321 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6322 comm_n = PetscObjectComm((PetscObject)*mat_n); 6323 } else { 6324 comm_n = PETSC_COMM_SELF; 6325 } 6326 } else { /* MAT_INITIAL_MATRIX */ 6327 PetscMPIInt rank; 6328 6329 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6330 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6331 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6332 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6333 comm_n = PetscSubcommChild(subcomm); 6334 } 6335 /* flag to destroy *mat_n if not significative */ 6336 if (color) destroy_mat = PETSC_TRUE; 6337 } else { 6338 comm_n = comm; 6339 } 6340 6341 /* prepare send/receive buffers */ 6342 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6343 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6344 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6345 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6346 if (nis) { 6347 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6348 } 6349 6350 /* Get data from local matrices */ 6351 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6352 /* TODO: See below some guidelines on how to prepare the local buffers */ 6353 /* 6354 send_buffer_vals should contain the raw values of the local matrix 6355 send_buffer_idxs should contain: 6356 - MatType_PRIVATE type 6357 - PetscInt size_of_l2gmap 6358 - PetscInt global_row_indices[size_of_l2gmap] 6359 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6360 */ 6361 else { 6362 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6363 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6364 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6365 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6366 send_buffer_idxs[1] = i; 6367 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6368 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6369 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6370 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6371 for (i=0;i<n_sends;i++) { 6372 ilengths_vals[is_indices[i]] = len*len; 6373 ilengths_idxs[is_indices[i]] = len+2; 6374 } 6375 } 6376 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6377 /* additional is (if any) */ 6378 if (nis) { 6379 PetscMPIInt psum; 6380 PetscInt j; 6381 for (j=0,psum=0;j<nis;j++) { 6382 PetscInt plen; 6383 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6384 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6385 psum += len+1; /* indices + lenght */ 6386 } 6387 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6388 for (j=0,psum=0;j<nis;j++) { 6389 PetscInt plen; 6390 const PetscInt *is_array_idxs; 6391 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6392 send_buffer_idxs_is[psum] = plen; 6393 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6394 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6395 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6396 psum += plen+1; /* indices + lenght */ 6397 } 6398 for (i=0;i<n_sends;i++) { 6399 ilengths_idxs_is[is_indices[i]] = psum; 6400 } 6401 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6402 } 6403 6404 buf_size_idxs = 0; 6405 buf_size_vals = 0; 6406 buf_size_idxs_is = 0; 6407 buf_size_vecs = 0; 6408 for (i=0;i<n_recvs;i++) { 6409 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6410 buf_size_vals += (PetscInt)olengths_vals[i]; 6411 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6412 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6413 } 6414 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6415 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6416 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6417 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6418 6419 /* get new tags for clean communications */ 6420 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6421 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6422 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6423 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6424 6425 /* allocate for requests */ 6426 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6427 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6428 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6429 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6430 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6431 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6432 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6433 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6434 6435 /* communications */ 6436 ptr_idxs = recv_buffer_idxs; 6437 ptr_vals = recv_buffer_vals; 6438 ptr_idxs_is = recv_buffer_idxs_is; 6439 ptr_vecs = recv_buffer_vecs; 6440 for (i=0;i<n_recvs;i++) { 6441 source_dest = onodes[i]; 6442 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6443 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6444 ptr_idxs += olengths_idxs[i]; 6445 ptr_vals += olengths_vals[i]; 6446 if (nis) { 6447 source_dest = onodes_is[i]; 6448 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6449 ptr_idxs_is += olengths_idxs_is[i]; 6450 } 6451 if (nvecs) { 6452 source_dest = onodes[i]; 6453 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6454 ptr_vecs += olengths_idxs[i]-2; 6455 } 6456 } 6457 for (i=0;i<n_sends;i++) { 6458 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6459 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6460 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6461 if (nis) { 6462 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6463 } 6464 if (nvecs) { 6465 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6466 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6467 } 6468 } 6469 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6470 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6471 6472 /* assemble new l2g map */ 6473 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6474 ptr_idxs = recv_buffer_idxs; 6475 new_local_rows = 0; 6476 for (i=0;i<n_recvs;i++) { 6477 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6478 ptr_idxs += olengths_idxs[i]; 6479 } 6480 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6481 ptr_idxs = recv_buffer_idxs; 6482 new_local_rows = 0; 6483 for (i=0;i<n_recvs;i++) { 6484 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6485 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6486 ptr_idxs += olengths_idxs[i]; 6487 } 6488 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6489 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6490 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6491 6492 /* infer new local matrix type from received local matrices type */ 6493 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6494 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6495 if (n_recvs) { 6496 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6497 ptr_idxs = recv_buffer_idxs; 6498 for (i=0;i<n_recvs;i++) { 6499 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6500 new_local_type_private = MATAIJ_PRIVATE; 6501 break; 6502 } 6503 ptr_idxs += olengths_idxs[i]; 6504 } 6505 switch (new_local_type_private) { 6506 case MATDENSE_PRIVATE: 6507 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6508 new_local_type = MATSEQAIJ; 6509 bs = 1; 6510 } else { /* if I receive only 1 dense matrix */ 6511 new_local_type = MATSEQDENSE; 6512 bs = 1; 6513 } 6514 break; 6515 case MATAIJ_PRIVATE: 6516 new_local_type = MATSEQAIJ; 6517 bs = 1; 6518 break; 6519 case MATBAIJ_PRIVATE: 6520 new_local_type = MATSEQBAIJ; 6521 break; 6522 case MATSBAIJ_PRIVATE: 6523 new_local_type = MATSEQSBAIJ; 6524 break; 6525 default: 6526 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6527 break; 6528 } 6529 } else { /* by default, new_local_type is seqdense */ 6530 new_local_type = MATSEQDENSE; 6531 bs = 1; 6532 } 6533 6534 /* create MATIS object if needed */ 6535 if (!reuse) { 6536 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6537 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6538 } else { 6539 /* it also destroys the local matrices */ 6540 if (*mat_n) { 6541 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6542 } else { /* this is a fake object */ 6543 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6544 } 6545 } 6546 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6547 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6548 6549 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6550 6551 /* Global to local map of received indices */ 6552 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6553 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6554 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6555 6556 /* restore attributes -> type of incoming data and its size */ 6557 buf_size_idxs = 0; 6558 for (i=0;i<n_recvs;i++) { 6559 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6560 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6561 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6562 } 6563 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6564 6565 /* set preallocation */ 6566 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6567 if (!newisdense) { 6568 PetscInt *new_local_nnz=0; 6569 6570 ptr_idxs = recv_buffer_idxs_local; 6571 if (n_recvs) { 6572 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6573 } 6574 for (i=0;i<n_recvs;i++) { 6575 PetscInt j; 6576 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6577 for (j=0;j<*(ptr_idxs+1);j++) { 6578 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6579 } 6580 } else { 6581 /* TODO */ 6582 } 6583 ptr_idxs += olengths_idxs[i]; 6584 } 6585 if (new_local_nnz) { 6586 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6587 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6588 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6589 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6590 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6591 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6592 } else { 6593 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6594 } 6595 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6596 } else { 6597 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6598 } 6599 6600 /* set values */ 6601 ptr_vals = recv_buffer_vals; 6602 ptr_idxs = recv_buffer_idxs_local; 6603 for (i=0;i<n_recvs;i++) { 6604 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6605 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6606 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6607 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6608 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6609 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6610 } else { 6611 /* TODO */ 6612 } 6613 ptr_idxs += olengths_idxs[i]; 6614 ptr_vals += olengths_vals[i]; 6615 } 6616 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6617 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6618 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6619 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6620 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6621 6622 #if 0 6623 if (!restrict_comm) { /* check */ 6624 Vec lvec,rvec; 6625 PetscReal infty_error; 6626 6627 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6628 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6629 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6630 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6631 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6632 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6633 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6634 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6635 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6636 } 6637 #endif 6638 6639 /* assemble new additional is (if any) */ 6640 if (nis) { 6641 PetscInt **temp_idxs,*count_is,j,psum; 6642 6643 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6644 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6645 ptr_idxs = recv_buffer_idxs_is; 6646 psum = 0; 6647 for (i=0;i<n_recvs;i++) { 6648 for (j=0;j<nis;j++) { 6649 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6650 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6651 psum += plen; 6652 ptr_idxs += plen+1; /* shift pointer to received data */ 6653 } 6654 } 6655 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6656 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6657 for (i=1;i<nis;i++) { 6658 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6659 } 6660 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6661 ptr_idxs = recv_buffer_idxs_is; 6662 for (i=0;i<n_recvs;i++) { 6663 for (j=0;j<nis;j++) { 6664 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6665 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6666 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6667 ptr_idxs += plen+1; /* shift pointer to received data */ 6668 } 6669 } 6670 for (i=0;i<nis;i++) { 6671 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6672 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6673 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6674 } 6675 ierr = PetscFree(count_is);CHKERRQ(ierr); 6676 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6677 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6678 } 6679 /* free workspace */ 6680 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6681 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6682 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6683 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6684 if (isdense) { 6685 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6686 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6687 } else { 6688 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6689 } 6690 if (nis) { 6691 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6692 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6693 } 6694 6695 if (nvecs) { 6696 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6697 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6698 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6699 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6700 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6701 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6702 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6703 /* set values */ 6704 ptr_vals = recv_buffer_vecs; 6705 ptr_idxs = recv_buffer_idxs_local; 6706 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6707 for (i=0;i<n_recvs;i++) { 6708 PetscInt j; 6709 for (j=0;j<*(ptr_idxs+1);j++) { 6710 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6711 } 6712 ptr_idxs += olengths_idxs[i]; 6713 ptr_vals += olengths_idxs[i]-2; 6714 } 6715 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6716 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6717 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6718 } 6719 6720 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6721 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6722 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6723 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6724 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6725 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6726 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6727 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6728 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6729 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6730 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6731 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6732 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6733 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6734 ierr = PetscFree(onodes);CHKERRQ(ierr); 6735 if (nis) { 6736 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6737 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6738 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6739 } 6740 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6741 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6742 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6743 for (i=0;i<nis;i++) { 6744 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6745 } 6746 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6747 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6748 } 6749 *mat_n = NULL; 6750 } 6751 PetscFunctionReturn(0); 6752 } 6753 6754 /* temporary hack into ksp private data structure */ 6755 #include <petsc/private/kspimpl.h> 6756 6757 #undef __FUNCT__ 6758 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6759 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6760 { 6761 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6762 PC_IS *pcis = (PC_IS*)pc->data; 6763 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6764 Mat coarsedivudotp = NULL; 6765 MatNullSpace CoarseNullSpace = NULL; 6766 ISLocalToGlobalMapping coarse_islg; 6767 IS coarse_is,*isarray; 6768 PetscInt i,im_active=-1,active_procs=-1; 6769 PetscInt nis,nisdofs,nisneu,nisvert; 6770 PC pc_temp; 6771 PCType coarse_pc_type; 6772 KSPType coarse_ksp_type; 6773 PetscBool multilevel_requested,multilevel_allowed; 6774 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6775 Mat t_coarse_mat_is; 6776 PetscInt ncoarse; 6777 PetscBool compute_vecs = PETSC_FALSE; 6778 PetscScalar *array; 6779 MatReuse coarse_mat_reuse; 6780 PetscBool restr, full_restr, have_void; 6781 PetscErrorCode ierr; 6782 6783 PetscFunctionBegin; 6784 /* Assign global numbering to coarse dofs */ 6785 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6786 PetscInt ocoarse_size; 6787 compute_vecs = PETSC_TRUE; 6788 ocoarse_size = pcbddc->coarse_size; 6789 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6790 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6791 /* see if we can avoid some work */ 6792 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6793 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6794 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6795 PC pc; 6796 PetscBool isbddc; 6797 6798 /* temporary workaround since PCBDDC does not have a reset method so far */ 6799 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6800 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6801 if (isbddc) { 6802 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6803 } else { 6804 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6805 } 6806 coarse_reuse = PETSC_FALSE; 6807 } else { /* we can safely reuse already computed coarse matrix */ 6808 coarse_reuse = PETSC_TRUE; 6809 } 6810 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6811 coarse_reuse = PETSC_FALSE; 6812 } 6813 /* reset any subassembling information */ 6814 if (!coarse_reuse || pcbddc->recompute_topography) { 6815 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6816 } 6817 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6818 coarse_reuse = PETSC_TRUE; 6819 } 6820 /* assemble coarse matrix */ 6821 if (coarse_reuse && pcbddc->coarse_ksp) { 6822 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6823 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6824 coarse_mat_reuse = MAT_REUSE_MATRIX; 6825 } else { 6826 coarse_mat = NULL; 6827 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6828 } 6829 6830 /* creates temporary l2gmap and IS for coarse indexes */ 6831 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6832 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6833 6834 /* creates temporary MATIS object for coarse matrix */ 6835 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6836 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6837 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6838 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6839 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6840 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6841 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6842 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6843 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6844 6845 /* count "active" (i.e. with positive local size) and "void" processes */ 6846 im_active = !!(pcis->n); 6847 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6848 6849 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6850 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6851 /* full_restr : just use the receivers from the subassembling pattern */ 6852 coarse_mat_is = NULL; 6853 multilevel_allowed = PETSC_FALSE; 6854 multilevel_requested = PETSC_FALSE; 6855 full_restr = PETSC_TRUE; 6856 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6857 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6858 if (multilevel_requested) { 6859 ncoarse = active_procs/pcbddc->coarsening_ratio; 6860 restr = PETSC_FALSE; 6861 full_restr = PETSC_FALSE; 6862 } else { 6863 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6864 restr = PETSC_TRUE; 6865 full_restr = PETSC_TRUE; 6866 } 6867 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6868 ncoarse = PetscMax(1,ncoarse); 6869 if (!pcbddc->coarse_subassembling) { 6870 if (pcbddc->coarsening_ratio > 1) { 6871 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6872 } else { 6873 PetscMPIInt size,rank; 6874 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6875 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6876 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6877 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6878 } 6879 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6880 PetscInt psum; 6881 PetscMPIInt size; 6882 if (pcbddc->coarse_ksp) psum = 1; 6883 else psum = 0; 6884 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6885 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6886 if (ncoarse < size) have_void = PETSC_TRUE; 6887 } 6888 /* determine if we can go multilevel */ 6889 if (multilevel_requested) { 6890 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6891 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6892 } 6893 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6894 6895 /* dump subassembling pattern */ 6896 if (pcbddc->dbg_flag && multilevel_allowed) { 6897 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6898 } 6899 6900 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6901 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6902 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6903 const PetscInt *idxs; 6904 ISLocalToGlobalMapping tmap; 6905 6906 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6907 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6908 /* allocate space for temporary storage */ 6909 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6910 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 6911 /* allocate for IS array */ 6912 nisdofs = pcbddc->n_ISForDofsLocal; 6913 nisneu = !!pcbddc->NeumannBoundariesLocal; 6914 nisvert = 0; /* nisvert is not used */ 6915 nis = nisdofs + nisneu + nisvert; 6916 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 6917 /* dofs splitting */ 6918 for (i=0;i<nisdofs;i++) { 6919 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 6920 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 6921 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6922 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6923 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6924 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6925 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6926 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 6927 } 6928 /* neumann boundaries */ 6929 if (pcbddc->NeumannBoundariesLocal) { 6930 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 6931 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 6932 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6933 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6934 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6935 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6936 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 6937 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 6938 } 6939 /* free memory */ 6940 ierr = PetscFree(tidxs);CHKERRQ(ierr); 6941 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 6942 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6943 } else { 6944 nis = 0; 6945 nisdofs = 0; 6946 nisneu = 0; 6947 nisvert = 0; 6948 isarray = NULL; 6949 } 6950 /* destroy no longer needed map */ 6951 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6952 6953 /* subassemble */ 6954 if (multilevel_allowed) { 6955 Vec vp[1]; 6956 PetscInt nvecs = 0; 6957 PetscBool reuse,reuser; 6958 6959 if (coarse_mat) reuse = PETSC_TRUE; 6960 else reuse = PETSC_FALSE; 6961 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6962 vp[0] = NULL; 6963 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6964 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6965 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6966 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6967 nvecs = 1; 6968 6969 if (pcbddc->divudotp) { 6970 Mat B,loc_divudotp; 6971 Vec v,p; 6972 IS dummy; 6973 PetscInt np; 6974 6975 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6976 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6977 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6978 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6979 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6980 ierr = VecSet(p,1.);CHKERRQ(ierr); 6981 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6982 ierr = VecDestroy(&p);CHKERRQ(ierr); 6983 ierr = MatDestroy(&B);CHKERRQ(ierr); 6984 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6985 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6986 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6987 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6988 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6989 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6990 ierr = VecDestroy(&v);CHKERRQ(ierr); 6991 } 6992 } 6993 if (reuser) { 6994 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6995 } else { 6996 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6997 } 6998 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6999 PetscScalar *arraym,*arrayv; 7000 PetscInt nl; 7001 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7002 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7003 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7004 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7005 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7006 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7007 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7008 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7009 } else { 7010 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7011 } 7012 } else { 7013 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 7014 } 7015 if (coarse_mat_is || coarse_mat) { 7016 PetscMPIInt size; 7017 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 7018 if (!multilevel_allowed) { 7019 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7020 } else { 7021 Mat A; 7022 7023 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7024 if (coarse_mat_is) { 7025 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7026 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7027 coarse_mat = coarse_mat_is; 7028 } 7029 /* be sure we don't have MatSeqDENSE as local mat */ 7030 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7031 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7032 } 7033 } 7034 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7035 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7036 7037 /* create local to global scatters for coarse problem */ 7038 if (compute_vecs) { 7039 PetscInt lrows; 7040 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7041 if (coarse_mat) { 7042 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7043 } else { 7044 lrows = 0; 7045 } 7046 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7047 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7048 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7049 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7050 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7051 } 7052 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7053 7054 /* set defaults for coarse KSP and PC */ 7055 if (multilevel_allowed) { 7056 coarse_ksp_type = KSPRICHARDSON; 7057 coarse_pc_type = PCBDDC; 7058 } else { 7059 coarse_ksp_type = KSPPREONLY; 7060 coarse_pc_type = PCREDUNDANT; 7061 } 7062 7063 /* print some info if requested */ 7064 if (pcbddc->dbg_flag) { 7065 if (!multilevel_allowed) { 7066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7067 if (multilevel_requested) { 7068 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7069 } else if (pcbddc->max_levels) { 7070 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7071 } 7072 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7073 } 7074 } 7075 7076 /* create the coarse KSP object only once with defaults */ 7077 if (coarse_mat) { 7078 PetscViewer dbg_viewer = NULL; 7079 if (pcbddc->dbg_flag) { 7080 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7081 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7082 } 7083 if (!pcbddc->coarse_ksp) { 7084 char prefix[256],str_level[16]; 7085 size_t len; 7086 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7087 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7088 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7089 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7090 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7091 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7092 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7093 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7094 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7095 /* prefix */ 7096 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7097 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7098 if (!pcbddc->current_level) { 7099 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7100 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7101 } else { 7102 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7103 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7104 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7105 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7106 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7107 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7108 } 7109 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7110 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7111 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7112 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7113 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7114 /* allow user customization */ 7115 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7116 } 7117 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7118 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7119 if (nisdofs) { 7120 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7121 for (i=0;i<nisdofs;i++) { 7122 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7123 } 7124 } 7125 if (nisneu) { 7126 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7127 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7128 } 7129 if (nisvert) { 7130 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7131 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7132 } 7133 7134 /* get some info after set from options */ 7135 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7136 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7137 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7138 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7139 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7140 isbddc = PETSC_FALSE; 7141 } 7142 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7143 if (isredundant) { 7144 KSP inner_ksp; 7145 PC inner_pc; 7146 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7147 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7148 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7149 } 7150 7151 /* parameters which miss an API */ 7152 if (isbddc) { 7153 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7154 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7155 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7156 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7157 if (pcbddc_coarse->benign_saddle_point) { 7158 Mat coarsedivudotp_is; 7159 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7160 IS row,col; 7161 const PetscInt *gidxs; 7162 PetscInt n,st,M,N; 7163 7164 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7165 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7166 st = st-n; 7167 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7168 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7169 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7170 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7171 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7172 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7173 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7174 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7175 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7176 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7177 ierr = ISDestroy(&row);CHKERRQ(ierr); 7178 ierr = ISDestroy(&col);CHKERRQ(ierr); 7179 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7180 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7181 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7182 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7183 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7184 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7185 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7186 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7187 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7188 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7189 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7190 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7191 } 7192 } 7193 7194 /* propagate symmetry info of coarse matrix */ 7195 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7196 if (pc->pmat->symmetric_set) { 7197 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7198 } 7199 if (pc->pmat->hermitian_set) { 7200 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7201 } 7202 if (pc->pmat->spd_set) { 7203 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7204 } 7205 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7206 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7207 } 7208 /* set operators */ 7209 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7210 if (pcbddc->dbg_flag) { 7211 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7212 } 7213 } 7214 ierr = PetscFree(isarray);CHKERRQ(ierr); 7215 #if 0 7216 { 7217 PetscViewer viewer; 7218 char filename[256]; 7219 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7220 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7221 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7222 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7223 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7224 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7225 } 7226 #endif 7227 7228 if (pcbddc->coarse_ksp) { 7229 Vec crhs,csol; 7230 7231 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7232 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7233 if (!csol) { 7234 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7235 } 7236 if (!crhs) { 7237 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7238 } 7239 } 7240 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7241 7242 /* compute null space for coarse solver if the benign trick has been requested */ 7243 if (pcbddc->benign_null) { 7244 7245 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7246 for (i=0;i<pcbddc->benign_n;i++) { 7247 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7248 } 7249 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7250 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7251 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7252 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7253 if (coarse_mat) { 7254 Vec nullv; 7255 PetscScalar *array,*array2; 7256 PetscInt nl; 7257 7258 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7259 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7260 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7261 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7262 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7263 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7264 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7265 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7266 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7267 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7268 } 7269 } 7270 7271 if (pcbddc->coarse_ksp) { 7272 PetscBool ispreonly; 7273 7274 if (CoarseNullSpace) { 7275 PetscBool isnull; 7276 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7277 if (isnull) { 7278 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7279 } 7280 /* TODO: add local nullspaces (if any) */ 7281 } 7282 /* setup coarse ksp */ 7283 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7284 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7285 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7286 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7287 KSP check_ksp; 7288 KSPType check_ksp_type; 7289 PC check_pc; 7290 Vec check_vec,coarse_vec; 7291 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7292 PetscInt its; 7293 PetscBool compute_eigs; 7294 PetscReal *eigs_r,*eigs_c; 7295 PetscInt neigs; 7296 const char *prefix; 7297 7298 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7299 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7300 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7301 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7302 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7303 /* prevent from setup unneeded object */ 7304 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7305 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7306 if (ispreonly) { 7307 check_ksp_type = KSPPREONLY; 7308 compute_eigs = PETSC_FALSE; 7309 } else { 7310 check_ksp_type = KSPGMRES; 7311 compute_eigs = PETSC_TRUE; 7312 } 7313 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7314 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7315 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7316 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7317 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7318 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7319 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7320 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7321 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7322 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7323 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7324 /* create random vec */ 7325 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7326 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7327 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7328 /* solve coarse problem */ 7329 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7330 /* set eigenvalue estimation if preonly has not been requested */ 7331 if (compute_eigs) { 7332 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7333 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7334 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7335 if (neigs) { 7336 lambda_max = eigs_r[neigs-1]; 7337 lambda_min = eigs_r[0]; 7338 if (pcbddc->use_coarse_estimates) { 7339 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7340 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7341 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7342 } 7343 } 7344 } 7345 } 7346 7347 /* check coarse problem residual error */ 7348 if (pcbddc->dbg_flag) { 7349 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7350 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7351 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7352 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7353 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7354 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7355 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7356 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7357 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7358 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7359 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7360 if (CoarseNullSpace) { 7361 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7362 } 7363 if (compute_eigs) { 7364 PetscReal lambda_max_s,lambda_min_s; 7365 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7366 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7367 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7368 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7369 for (i=0;i<neigs;i++) { 7370 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7371 } 7372 } 7373 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7374 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7375 } 7376 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7377 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7378 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7379 if (compute_eigs) { 7380 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7381 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7382 } 7383 } 7384 } 7385 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7386 /* print additional info */ 7387 if (pcbddc->dbg_flag) { 7388 /* waits until all processes reaches this point */ 7389 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7390 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7391 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7392 } 7393 7394 /* free memory */ 7395 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7396 PetscFunctionReturn(0); 7397 } 7398 7399 #undef __FUNCT__ 7400 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7401 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7402 { 7403 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7404 PC_IS* pcis = (PC_IS*)pc->data; 7405 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7406 IS subset,subset_mult,subset_n; 7407 PetscInt local_size,coarse_size=0; 7408 PetscInt *local_primal_indices=NULL; 7409 const PetscInt *t_local_primal_indices; 7410 PetscErrorCode ierr; 7411 7412 PetscFunctionBegin; 7413 /* Compute global number of coarse dofs */ 7414 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7415 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7416 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7417 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7418 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7419 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7420 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7421 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7422 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7423 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7424 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7425 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7426 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7427 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7428 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7429 7430 /* check numbering */ 7431 if (pcbddc->dbg_flag) { 7432 PetscScalar coarsesum,*array,*array2; 7433 PetscInt i; 7434 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7435 7436 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7437 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7438 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7439 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7440 /* counter */ 7441 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7442 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7443 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7444 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7445 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7446 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7447 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7448 for (i=0;i<pcbddc->local_primal_size;i++) { 7449 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7450 } 7451 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7452 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7453 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7454 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7455 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7456 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7457 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7458 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7459 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7460 for (i=0;i<pcis->n;i++) { 7461 if (array[i] != 0.0 && array[i] != array2[i]) { 7462 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7463 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7464 set_error = PETSC_TRUE; 7465 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7466 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7467 } 7468 } 7469 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7470 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7471 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7472 for (i=0;i<pcis->n;i++) { 7473 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7474 } 7475 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7476 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7477 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7478 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7479 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7480 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7481 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7482 PetscInt *gidxs; 7483 7484 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7485 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7486 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7487 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7488 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7489 for (i=0;i<pcbddc->local_primal_size;i++) { 7490 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7491 } 7492 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7493 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7494 } 7495 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7496 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7497 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7498 } 7499 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7500 /* get back data */ 7501 *coarse_size_n = coarse_size; 7502 *local_primal_indices_n = local_primal_indices; 7503 PetscFunctionReturn(0); 7504 } 7505 7506 #undef __FUNCT__ 7507 #define __FUNCT__ "PCBDDCGlobalToLocal" 7508 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7509 { 7510 IS localis_t; 7511 PetscInt i,lsize,*idxs,n; 7512 PetscScalar *vals; 7513 PetscErrorCode ierr; 7514 7515 PetscFunctionBegin; 7516 /* get indices in local ordering exploiting local to global map */ 7517 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7518 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7519 for (i=0;i<lsize;i++) vals[i] = 1.0; 7520 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7521 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7522 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7523 if (idxs) { /* multilevel guard */ 7524 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7525 } 7526 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7527 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7528 ierr = PetscFree(vals);CHKERRQ(ierr); 7529 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7530 /* now compute set in local ordering */ 7531 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7532 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7533 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7534 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7535 for (i=0,lsize=0;i<n;i++) { 7536 if (PetscRealPart(vals[i]) > 0.5) { 7537 lsize++; 7538 } 7539 } 7540 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7541 for (i=0,lsize=0;i<n;i++) { 7542 if (PetscRealPart(vals[i]) > 0.5) { 7543 idxs[lsize++] = i; 7544 } 7545 } 7546 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7547 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7548 *localis = localis_t; 7549 PetscFunctionReturn(0); 7550 } 7551 7552 #undef __FUNCT__ 7553 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7554 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7555 { 7556 PC_IS *pcis=(PC_IS*)pc->data; 7557 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7558 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7559 Mat S_j; 7560 PetscInt *used_xadj,*used_adjncy; 7561 PetscBool free_used_adj; 7562 PetscErrorCode ierr; 7563 7564 PetscFunctionBegin; 7565 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7566 free_used_adj = PETSC_FALSE; 7567 if (pcbddc->sub_schurs_layers == -1) { 7568 used_xadj = NULL; 7569 used_adjncy = NULL; 7570 } else { 7571 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7572 used_xadj = pcbddc->mat_graph->xadj; 7573 used_adjncy = pcbddc->mat_graph->adjncy; 7574 } else if (pcbddc->computed_rowadj) { 7575 used_xadj = pcbddc->mat_graph->xadj; 7576 used_adjncy = pcbddc->mat_graph->adjncy; 7577 } else { 7578 PetscBool flg_row=PETSC_FALSE; 7579 const PetscInt *xadj,*adjncy; 7580 PetscInt nvtxs; 7581 7582 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7583 if (flg_row) { 7584 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7585 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7586 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7587 free_used_adj = PETSC_TRUE; 7588 } else { 7589 pcbddc->sub_schurs_layers = -1; 7590 used_xadj = NULL; 7591 used_adjncy = NULL; 7592 } 7593 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7594 } 7595 } 7596 7597 /* setup sub_schurs data */ 7598 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7599 if (!sub_schurs->schur_explicit) { 7600 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7601 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7602 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7603 } else { 7604 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7605 PetscBool isseqaij,need_change = PETSC_FALSE; 7606 PetscInt benign_n; 7607 Mat change = NULL; 7608 Vec scaling = NULL; 7609 IS change_primal = NULL; 7610 7611 if (!pcbddc->use_vertices && reuse_solvers) { 7612 PetscInt n_vertices; 7613 7614 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7615 reuse_solvers = (PetscBool)!n_vertices; 7616 } 7617 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7618 if (!isseqaij) { 7619 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7620 if (matis->A == pcbddc->local_mat) { 7621 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7622 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7623 } else { 7624 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7625 } 7626 } 7627 if (!pcbddc->benign_change_explicit) { 7628 benign_n = pcbddc->benign_n; 7629 } else { 7630 benign_n = 0; 7631 } 7632 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7633 We need a global reduction to avoid possible deadlocks. 7634 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7635 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7636 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7637 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7638 need_change = (PetscBool)(!need_change); 7639 } 7640 /* If the user defines additional constraints, we import them here. 7641 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7642 if (need_change) { 7643 PC_IS *pcisf; 7644 PC_BDDC *pcbddcf; 7645 PC pcf; 7646 7647 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7648 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7649 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7650 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7651 /* hacks */ 7652 pcisf = (PC_IS*)pcf->data; 7653 pcisf->is_B_local = pcis->is_B_local; 7654 pcisf->vec1_N = pcis->vec1_N; 7655 pcisf->BtoNmap = pcis->BtoNmap; 7656 pcisf->n = pcis->n; 7657 pcisf->n_B = pcis->n_B; 7658 pcbddcf = (PC_BDDC*)pcf->data; 7659 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7660 pcbddcf->mat_graph = pcbddc->mat_graph; 7661 pcbddcf->use_faces = PETSC_TRUE; 7662 pcbddcf->use_change_of_basis = PETSC_TRUE; 7663 pcbddcf->use_change_on_faces = PETSC_TRUE; 7664 pcbddcf->use_qr_single = PETSC_TRUE; 7665 pcbddcf->fake_change = PETSC_TRUE; 7666 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7667 /* store information on primal vertices and change of basis (in local numbering) */ 7668 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7669 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7670 change = pcbddcf->ConstraintMatrix; 7671 pcbddcf->ConstraintMatrix = NULL; 7672 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7673 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7674 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7675 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7676 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7677 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7678 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7679 pcf->ops->destroy = NULL; 7680 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7681 } 7682 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7683 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7684 ierr = MatDestroy(&change);CHKERRQ(ierr); 7685 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7686 } 7687 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7688 7689 /* free adjacency */ 7690 if (free_used_adj) { 7691 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7692 } 7693 PetscFunctionReturn(0); 7694 } 7695 7696 #undef __FUNCT__ 7697 #define __FUNCT__ "PCBDDCInitSubSchurs" 7698 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7699 { 7700 PC_IS *pcis=(PC_IS*)pc->data; 7701 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7702 PCBDDCGraph graph; 7703 PetscErrorCode ierr; 7704 7705 PetscFunctionBegin; 7706 /* attach interface graph for determining subsets */ 7707 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7708 IS verticesIS,verticescomm; 7709 PetscInt vsize,*idxs; 7710 7711 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7712 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7713 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7714 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7715 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7716 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7717 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7718 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7719 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7720 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7721 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7722 } else { 7723 graph = pcbddc->mat_graph; 7724 } 7725 /* print some info */ 7726 if (pcbddc->dbg_flag) { 7727 IS vertices; 7728 PetscInt nv,nedges,nfaces; 7729 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7730 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7731 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7732 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7733 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7734 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7735 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7736 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7737 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7738 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7739 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7740 } 7741 7742 /* sub_schurs init */ 7743 if (!pcbddc->sub_schurs) { 7744 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7745 } 7746 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 7747 7748 /* free graph struct */ 7749 if (pcbddc->sub_schurs_rebuild) { 7750 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7751 } 7752 PetscFunctionReturn(0); 7753 } 7754 7755 #undef __FUNCT__ 7756 #define __FUNCT__ "PCBDDCCheckOperator" 7757 PetscErrorCode PCBDDCCheckOperator(PC pc) 7758 { 7759 PC_IS *pcis=(PC_IS*)pc->data; 7760 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7761 PetscErrorCode ierr; 7762 7763 PetscFunctionBegin; 7764 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7765 IS zerodiag = NULL; 7766 Mat S_j,B0_B=NULL; 7767 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7768 PetscScalar *p0_check,*array,*array2; 7769 PetscReal norm; 7770 PetscInt i; 7771 7772 /* B0 and B0_B */ 7773 if (zerodiag) { 7774 IS dummy; 7775 7776 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7777 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7778 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7779 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7780 } 7781 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7782 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7783 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7784 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7785 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7786 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7787 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7788 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7789 /* S_j */ 7790 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7791 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7792 7793 /* mimic vector in \widetilde{W}_\Gamma */ 7794 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7795 /* continuous in primal space */ 7796 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7797 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7798 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7799 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7800 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7801 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7802 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7803 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7804 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7805 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7806 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7807 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7808 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7809 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7810 7811 /* assemble rhs for coarse problem */ 7812 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7813 /* local with Schur */ 7814 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7815 if (zerodiag) { 7816 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7817 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7818 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7819 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7820 } 7821 /* sum on primal nodes the local contributions */ 7822 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7823 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7824 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7825 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7826 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7827 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7828 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7829 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7830 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7831 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7832 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7833 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7834 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7835 /* scale primal nodes (BDDC sums contibutions) */ 7836 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7837 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7838 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7839 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7840 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7841 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7842 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7843 /* global: \widetilde{B0}_B w_\Gamma */ 7844 if (zerodiag) { 7845 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7846 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7847 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7848 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7849 } 7850 /* BDDC */ 7851 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7852 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7853 7854 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7855 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7856 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7857 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7858 for (i=0;i<pcbddc->benign_n;i++) { 7859 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7860 } 7861 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7862 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7863 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7864 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7865 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7866 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7867 } 7868 PetscFunctionReturn(0); 7869 } 7870