1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 if (!esize) PetscFunctionReturn(0); 84 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 85 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 86 87 /* gradients */ 88 ptr = work + 5*esize; 89 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 90 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 91 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 92 ierr = MatDestroy(&GE);CHKERRQ(ierr); 93 94 /* constants */ 95 ptr += rsize*csize; 96 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 97 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 99 ierr = MatDestroy(&GE);CHKERRQ(ierr); 100 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 101 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "PCBDDCNedelecSupport" 107 PetscErrorCode PCBDDCNedelecSupport(PC pc) 108 { 109 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 110 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 111 Mat G,T,conn,lG,lGt,lGis,lGall,lGe; 112 MatNullSpace nnsp; 113 Vec tvec,*quads; 114 PetscSF sfv; 115 ISLocalToGlobalMapping el2g,vl2g,fl2g; 116 MPI_Comm comm; 117 IS lned,primals,allprimals,nedfieldlocal; 118 IS *eedges,*extrows,*extcols,*alleedges; 119 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee; 120 PetscScalar *vals,*work; 121 PetscReal *rwork; 122 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 123 PetscInt ne,nv,Lv,order,n,field; 124 PetscInt n_neigh,*neigh,*n_shared,**shared; 125 PetscInt i,j,extmem,cum,maxsize,rst,nee,nquads=2; 126 PetscInt *extrow,*extrowcum,*marks,*emarks,*vmarks,*gidxs; 127 PetscInt *sfvleaves,*sfvroots; 128 PetscBool print,eerr,done,lrc[2],conforming; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 /* test variable order code and print debug info TODO: to be removed */ 133 print = PETSC_FALSE; 134 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 135 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 136 137 /* Return to caller if there are no edges in the decomposition */ 138 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 139 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 140 ierr = ISLocalToGlobalMappingGetSize(el2g,&n);CHKERRQ(ierr); 141 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 142 lrc[0] = PETSC_FALSE; 143 for (i=0;i<n;i++) { 144 if (PetscRealPart(vals[i]) > 2.) { 145 lrc[0] = PETSC_TRUE; 146 break; 147 } 148 } 149 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 150 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 151 if (!lrc[1]) PetscFunctionReturn(0); 152 153 /* Get discrete gradient 154 If it is defined for a subset of dofs, assumes G is given in global ordering for all the dofs */ 155 G = pcbddc->discretegradient; 156 order = pcbddc->nedorder; 157 conforming = pcbddc->conforming; 158 field = pcbddc->nedfield; 159 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 160 if (pcbddc->n_ISForDofsLocal && field > -1) { 161 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 162 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 163 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 164 } else { 165 PetscBool testnedfield = PETSC_FALSE; 166 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 167 if (!testnedfield) { 168 ne = n; 169 nedfieldlocal = NULL; 170 } else { 171 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 172 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 173 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 174 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 175 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 176 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 177 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 178 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 179 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 180 for (i=0,cum=0;i<n;i++) { 181 if (matis->sf_leafdata[i] > 1) { 182 matis->sf_leafdata[cum++] = i; 183 } 184 } 185 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 186 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 187 } 188 } 189 190 if (nedfieldlocal) { /* merge with previous code when testing is done */ 191 IS is; 192 193 /* need to map from the local Nedelec field to local numbering */ 194 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 195 /* need to map from the local Nedelec field to global numbering */ 196 ierr = ISLocalToGlobalMappingApplyIS(el2g,nedfieldlocal,&is);CHKERRQ(ierr); 197 ierr = ISLocalToGlobalMappingCreateIS(is,&el2g);CHKERRQ(ierr); 198 ierr = ISDestroy(&is);CHKERRQ(ierr); 199 } else { 200 ierr = PetscObjectReference((PetscObject)el2g);CHKERRQ(ierr); 201 fl2g = NULL; 202 } 203 204 /* Sanity checks */ 205 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 206 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 207 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 208 209 /* Drop connections for interior edges (this modifies G) */ 210 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 211 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 212 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 213 if (nedfieldlocal) { 214 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 215 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 216 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 217 } else { 218 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 219 } 220 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 221 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 222 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 223 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 224 if (matis->sf_rootdata[i] < 2) { 225 matis->sf_rootdata[cum++] = i + rst; 226 } 227 } 228 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 229 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 230 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 231 232 /* Extract subdomain relevant rows of G */ 233 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 234 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 235 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 236 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 237 ierr = ISDestroy(&lned);CHKERRQ(ierr); 238 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 239 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 240 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 241 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 242 if (print) { 243 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 244 ierr = MatView(lG,NULL);CHKERRQ(ierr); 245 } 246 247 /* SF for nodal communications */ 248 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 249 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 250 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 251 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 252 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 253 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 254 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 255 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 257 258 /* Destroy temporary G created in MATIS format */ 259 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 260 261 /* Analyze the edge-nodes connections (duplicate lG) */ 262 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 263 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 264 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 265 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 266 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 267 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 268 /* need to import the boundary specification to ensure the 269 proper detection of coarse edges' endpoints */ 270 if (pcbddc->DirichletBoundariesLocal) { 271 IS is; 272 273 if (fl2g) { 274 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 275 } else { 276 is = pcbddc->DirichletBoundariesLocal; 277 } 278 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 279 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 280 for (i=0;i<cum;i++) { 281 if (idxs[i] >= 0) { 282 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 283 } 284 } 285 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 286 if (fl2g) { 287 ierr = ISDestroy(&is);CHKERRQ(ierr); 288 } 289 } 290 if (pcbddc->NeumannBoundariesLocal) { 291 IS is; 292 293 if (fl2g) { 294 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 295 } else { 296 is = pcbddc->NeumannBoundariesLocal; 297 } 298 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 299 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 300 for (i=0;i<cum;i++) { 301 if (idxs[i] >= 0) { 302 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 303 } 304 } 305 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 306 if (fl2g) { 307 ierr = ISDestroy(&is);CHKERRQ(ierr); 308 } 309 } 310 311 /* need to remove coarse faces' dofs to ensure the 312 proper detection of coarse edges' endpoints */ 313 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 314 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 315 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 316 for (i=1;i<n_neigh;i++) 317 for (j=0;j<n_shared[i];j++) 318 marks[shared[i][j]]++; 319 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 320 for (i=0;i<ne;i++) { 321 if (marks[i] > 1 || (marks[i] == 1 && PetscBTLookup(btb,i))) { 322 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 323 } 324 } 325 326 if (!conforming) { 327 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 328 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 329 } 330 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 331 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 332 cum = 0; 333 for (i=0;i<ne;i++) { 334 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 335 if (!PetscBTLookup(btee,i)) { 336 marks[cum++] = i; 337 continue; 338 } 339 /* set badly connected edge dofs as primal */ 340 if (!conforming) { 341 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 342 marks[cum++] = i; 343 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 344 for (j=ii[i];j<ii[i+1];j++) { 345 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 346 } 347 } else { 348 /* every edge dofs should be connected trough a certain number of nodal dofs 349 to other edge dofs belonging to coarse edges 350 - at most 2 endpoints 351 - order-1 interior nodal dofs 352 - no undefined nodal dofs (nconn < order) 353 */ 354 PetscInt ends = 0,ints = 0, undef = 0; 355 for (j=ii[i];j<ii[i+1];j++) { 356 PetscInt v = jj[j],k; 357 PetscInt nconn = iit[v+1]-iit[v]; 358 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 359 if (nconn > order) ends++; 360 else if (nconn == order) ints++; 361 else undef++; 362 } 363 if (undef || ends > 2 || ints != order -1) { 364 marks[cum++] = i; 365 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 366 for (j=ii[i];j<ii[i+1];j++) { 367 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 368 } 369 } 370 } 371 } 372 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 373 if (!order && ii[i+1] != ii[i]) { 374 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 375 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 376 } 377 } 378 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 379 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 380 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 381 if (!conforming) { 382 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 383 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 384 } 385 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 386 /* identify splitpoints and corner candidates: TODO variable order */ 387 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 388 if (print) { 389 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 390 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 391 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 392 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 393 } 394 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 395 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 396 for (i=0;i<nv;i++) { 397 PetscInt ord = order, test = ii[i+1]-ii[i]; 398 if (!order) { 399 PetscReal vorder = 0.; 400 401 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 402 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 403 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 404 ord = 1; 405 } 406 #if defined(PETSC_USE_DEBUG) 407 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 408 #endif 409 if (test >= 3*ord) { /* splitpoints */ 410 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 411 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 412 } else if (test == ord) { 413 if (order == 1) { 414 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 415 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 416 } else { 417 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 418 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 419 } 420 } 421 } 422 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 423 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 424 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 425 426 /* Get the local G^T explicitly */ 427 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 428 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 429 if (print) { 430 ierr = PetscObjectSetName((PetscObject)lGt,"initial_lGt");CHKERRQ(ierr); 431 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 432 } 433 434 /* Mark interior nodal dofs */ 435 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 436 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 437 for (i=1;i<n_neigh;i++) { 438 for (j=0;j<n_shared[i];j++) { 439 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 440 } 441 } 442 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 443 444 /* communicate corners and splitpoints */ 445 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 446 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 447 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 448 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 449 450 if (print) { 451 IS tbz; 452 453 cum = 0; 454 for (i=0;i<nv;i++) 455 if (sfvleaves[i]) 456 vmarks[cum++] = i; 457 458 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 459 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 460 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 461 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 462 } 463 464 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 465 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 466 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 467 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 468 469 /* Zero rows of lGt corresponding to identified corners 470 and interior nodal dofs */ 471 cum = 0; 472 for (i=0;i<nv;i++) { 473 if (sfvleaves[i]) { 474 vmarks[cum++] = i; 475 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 476 } 477 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 478 } 479 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 480 if (print) { 481 IS tbz; 482 483 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 484 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 485 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 486 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 487 } 488 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 489 ierr = PetscFree(vmarks);CHKERRQ(ierr); 490 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 491 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 492 493 /* Recompute G */ 494 ierr = MatDestroy(&lG);CHKERRQ(ierr); 495 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 496 if (print) { 497 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 498 ierr = MatView(lG,NULL);CHKERRQ(ierr); 499 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 500 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 501 } 502 503 /* Get primal dofs (if any) */ 504 cum = 0; 505 for (i=0;i<ne;i++) { 506 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 507 } 508 if (fl2g) { 509 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 510 } 511 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 512 if (print) { 513 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 514 ierr = ISView(primals,NULL);CHKERRQ(ierr); 515 } 516 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 517 /* TODO: what if the user passed in some of them ? */ 518 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 519 ierr = ISDestroy(&primals);CHKERRQ(ierr); 520 521 /* Compute edge connectivity */ 522 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 523 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 524 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 525 if (done && i && ii && jj) { /* when lG is empty, don't pass pointers */ 526 if (fl2g) { 527 PetscBT btf; 528 PetscInt *iia,*jja,*iiu,*jju; 529 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 530 531 /* create CSR for all local dofs */ 532 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 533 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 534 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 535 iiu = pcbddc->mat_graph->xadj; 536 jju = pcbddc->mat_graph->adjncy; 537 } else if (pcbddc->use_local_adj) { 538 rest = PETSC_TRUE; 539 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 540 } else { 541 free = PETSC_TRUE; 542 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 543 iiu[0] = 0; 544 for (i=0;i<n;i++) { 545 iiu[i+1] = i+1; 546 jju[i] = -1; 547 } 548 } 549 550 /* import sizes of CSR */ 551 iia[0] = 0; 552 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 553 554 /* overwrite entries corresponding to the Nedelec field */ 555 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 556 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 557 for (i=0;i<ne;i++) { 558 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 559 iia[idxs[i]+1] = ii[i+1]-ii[i]; 560 } 561 562 /* iia in CSR */ 563 for (i=0;i<n;i++) iia[i+1] += iia[i]; 564 565 /* jja in CSR */ 566 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 567 for (i=0;i<n;i++) 568 if (!PetscBTLookup(btf,i)) 569 for (j=0;j<iiu[i+1]-iiu[i];j++) 570 jja[iia[i]+j] = jju[iiu[i]+j]; 571 572 /* map edge dofs connectivity */ 573 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 574 for (i=0;i<ne;i++) { 575 PetscInt e = idxs[i]; 576 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 577 } 578 579 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 580 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 581 if (rest) { 582 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 583 } 584 if (free) { 585 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 586 } 587 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 588 } else { 589 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 590 } 591 } 592 593 /* Analyze interface for edge dofs */ 594 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 595 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 596 597 /* Get coarse edges in the edge space */ 598 pcbddc->mat_graph->twodim = PETSC_FALSE; 599 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 600 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 601 602 if (fl2g) { 603 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 604 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 605 for (i=0;i<nee;i++) { 606 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 607 } 608 } else { 609 eedges = alleedges; 610 primals = allprimals; 611 } 612 613 /* Mark fine edge dofs with their coarse edge id */ 614 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 615 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 616 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 617 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 618 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 619 if (print) { 620 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 621 ierr = ISView(primals,NULL);CHKERRQ(ierr); 622 } 623 624 maxsize = 0; 625 for (i=0;i<nee;i++) { 626 PetscInt size,mark = i+1; 627 628 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 629 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 630 for (j=0;j<size;j++) marks[idxs[j]] = mark; 631 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 632 maxsize = PetscMax(maxsize,size); 633 } 634 635 /* Find coarse edge endpoints */ 636 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 637 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 638 for (i=0;i<nee;i++) { 639 PetscInt mark = i+1,size; 640 641 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 642 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 643 if (print) { 644 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 645 ISView(eedges[i],NULL); 646 } 647 for (j=0;j<size;j++) { 648 PetscInt k, ee = idxs[j]; 649 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 650 for (k=ii[ee];k<ii[ee+1];k++) { 651 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 652 if (PetscBTLookup(btv,jj[k])) { 653 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 654 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 655 if (print) PetscPrintf(PETSC_COMM_SELF," candidate? %d\n",(PetscBool)(PetscBTLookup(btvcand,jj[k]))); 656 PetscInt k2; 657 PetscBool corner = PETSC_FALSE; 658 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 659 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 660 /* it's a corner if either is connected with an edge dof belonging to a different cc or 661 if the edge dof lie on the natural part of the boundary */ 662 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 663 corner = PETSC_TRUE; 664 break; 665 } 666 } 667 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 668 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 669 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 670 } else { 671 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 672 } 673 } 674 } 675 } 676 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 677 } 678 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 679 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 680 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 681 682 /* Reset marked primal dofs */ 683 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 684 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 685 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 686 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 687 688 /* Compute extended cols indices */ 689 ierr = MatGetRowIJ(lG ,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 690 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 691 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 692 i *= maxsize; 693 ierr = PetscMalloc1(nee,&extcols);CHKERRQ(ierr); 694 ierr = PetscCalloc1(nee,&emarks);CHKERRQ(ierr); 695 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 696 eerr = PETSC_FALSE; 697 for (i=0;i<nee;i++) { 698 PetscInt size; 699 700 cum = 0; 701 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 702 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 703 for (j=0;j<size;j++) { 704 PetscInt k,ee = idxs[j]; 705 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 706 } 707 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 708 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 709 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 710 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 711 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 712 /* it may happen that endpoints are not defined at this point 713 if it is the case, mark this edge for a second pass */ 714 if (cum != size -1) { 715 emarks[i] = 1; 716 if (print) { 717 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 718 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 719 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 720 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 721 } 722 eerr = PETSC_TRUE; 723 } 724 } 725 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 726 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 727 if (done) { 728 PetscInt *newprimals; 729 730 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 731 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 732 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 733 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 734 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 735 for (i=0;i<nee;i++) { 736 if (emarks[i]) { 737 PetscInt size,mark = i+1; 738 739 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 740 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 741 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 742 for (j=0;j<size;j++) { 743 PetscInt k,ee = idxs[j]; 744 for (k=ii[ee];k<ii[ee+1];k++) { 745 /* set all candidates located on the edge as corners */ 746 if (PetscBTLookup(btvcand,jj[k])) { 747 PetscInt k2,vv = jj[k]; 748 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 749 /* set all edge dofs connected to candidate as primals */ 750 for (k2=iit[vv];k2<iit[vv+1];k2++) { 751 if (marks[jjt[k2]] == mark) { 752 PetscInt k3,ee2 = jjt[k2]; 753 newprimals[cum++] = ee2; 754 /* finally set the new corners */ 755 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 756 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 757 } 758 } 759 } 760 } 761 } 762 } 763 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 764 } 765 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 766 } 767 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 768 if (fl2g) { 769 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 770 ierr = ISDestroy(&primals);CHKERRQ(ierr); 771 for (i=0;i<nee;i++) { 772 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 773 } 774 ierr = PetscFree(eedges);CHKERRQ(ierr); 775 } 776 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 777 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 778 ierr = PetscFree(newprimals);CHKERRQ(ierr); 779 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 780 ierr = ISDestroy(&primals);CHKERRQ(ierr); 781 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 782 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 783 if (fl2g) { 784 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 785 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 786 for (i=0;i<nee;i++) { 787 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 788 } 789 } else { 790 eedges = alleedges; 791 primals = allprimals; 792 } 793 794 /* Mark again */ 795 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 796 for (i=0;i<nee;i++) { 797 PetscInt size,mark = i+1; 798 799 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 800 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 801 for (j=0;j<size;j++) marks[idxs[j]] = mark; 802 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 803 } 804 if (print) { 805 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 806 ierr = ISView(primals,NULL);CHKERRQ(ierr); 807 } 808 809 /* Recompute extended cols */ 810 eerr = PETSC_FALSE; 811 for (i=0;i<nee;i++) { 812 PetscInt size; 813 814 cum = 0; 815 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 816 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 817 for (j=0;j<size;j++) { 818 PetscInt k,ee = idxs[j]; 819 for (k=ii[ee];k<ii[ee+1];k++) { 820 if (!PetscBTLookup(btv,jj[k])) { 821 extrow[cum++] = jj[k]; 822 } 823 } 824 } 825 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 826 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 827 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 828 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 829 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 830 if (cum != size -1) { 831 if (print) { 832 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 833 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 834 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 835 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 836 } 837 eerr = PETSC_TRUE; 838 } 839 } 840 } 841 ierr = MatRestoreRowIJ( lG,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 842 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 843 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 844 ierr = PetscFree(emarks);CHKERRQ(ierr); 845 /* an error should not occur at this point */ 846 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 847 848 /* Check the number of endpoints */ 849 /* TODO: fix case for circular edge */ 850 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 851 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 852 for (i=0;i<nee;i++) { 853 PetscInt size, found = 0; 854 855 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 856 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 857 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 858 for (j=0;j<size;j++) { 859 PetscInt k,ee = idxs[j]; 860 for (k=ii[ee];k<ii[ee+1];k++) { 861 PetscInt vv = jj[k]; 862 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 863 found++; 864 } 865 } 866 } 867 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 868 if (found != 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d\n",found,i); 869 } 870 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 871 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 872 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 873 874 #if defined(PETSC_USE_DEBUG) 875 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 876 not interfere with neighbouring coarse edges */ 877 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 878 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 879 for (i=0;i<nv;i++) { 880 PetscInt emax = 0,eemax = 0; 881 882 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 883 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 884 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 885 for (j=1;j<nee+1;j++) { 886 if (emax < emarks[j]) { 887 emax = emarks[j]; 888 eemax = j; 889 } 890 } 891 /* not relevant for edges */ 892 if (!eemax) continue; 893 894 for (j=ii[i];j<ii[i+1];j++) { 895 if (marks[jj[j]] && marks[jj[j]] != eemax) { 896 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 897 } 898 } 899 } 900 ierr = PetscFree(emarks);CHKERRQ(ierr); 901 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 902 #endif 903 904 /* Compute extended rows indices for edge blocks of the change of basis */ 905 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 906 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 907 extmem *= maxsize; 908 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 909 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 910 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 911 for (i=0;i<nv;i++) { 912 PetscInt mark = 0,size,start; 913 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 914 for (j=ii[i];j<ii[i+1];j++) 915 if (marks[jj[j]] && !mark) 916 mark = marks[jj[j]]; 917 918 /* not relevant */ 919 if (!mark) continue; 920 921 /* import extended row */ 922 mark--; 923 start = mark*extmem+extrowcum[mark]; 924 size = ii[i+1]-ii[i]; 925 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 926 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 927 extrowcum[mark] += size; 928 } 929 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 930 cum = 0; 931 for (i=0;i<nee;i++) { 932 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 933 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 934 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 935 cum = PetscMax(cum,size); 936 } 937 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 938 ierr = PetscFree(marks);CHKERRQ(ierr); 939 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 940 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 941 942 /* Workspace for lapack inner calls and VecSetValues */ 943 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 944 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 945 for (i=0;i<maxsize;i++) vals[i] = 1.; 946 947 /* Create vectors for quadrature rules */ 948 /* TODO preserve other quadratures */ 949 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 950 for (i=0;i<nquads;i++) { 951 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 952 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 953 } 954 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 955 956 /* Create change of basis matrix (preallocation can be improved) */ 957 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 958 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 959 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 960 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 961 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 962 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 963 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 964 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 965 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 966 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 967 968 /* Defaults to identity */ 969 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 970 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 971 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 972 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 973 974 for (i=0;i<nee;i++) { 975 Mat Gins = NULL, GKins = NULL; 976 977 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 978 if (Gins && GKins) { 979 PetscScalar *data; 980 const PetscInt *rows,*cols; 981 PetscInt nrh,nch,nrc,ncc; 982 983 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 984 /* H1 */ 985 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 986 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 987 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 988 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 989 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 990 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 991 /* complement */ 992 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 993 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 994 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 995 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 996 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 997 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 998 /* Gins kernel quadratures */ 999 for (j=0;j<ncc;j++) { 1000 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 1001 } 1002 /* H1 average */ 1003 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1004 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1005 } 1006 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1007 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1008 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1009 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1010 } 1011 1012 /* Start assembling */ 1013 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1014 for (i=0;i<nquads;i++) { 1015 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 1016 } 1017 1018 /* Free */ 1019 if (fl2g) { 1020 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1021 for (i=0;i<nee;i++) { 1022 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1023 } 1024 ierr = PetscFree(eedges);CHKERRQ(ierr); 1025 } 1026 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1027 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1028 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1029 ierr = PetscFree(extrow);CHKERRQ(ierr); 1030 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1031 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1032 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1033 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1034 ierr = PetscFree(vals);CHKERRQ(ierr); 1035 ierr = PetscFree(extrows);CHKERRQ(ierr); 1036 ierr = PetscFree(extcols);CHKERRQ(ierr); 1037 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1038 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1039 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1040 1041 /* Complete assembling */ 1042 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1043 for (i=0;i<nquads;i++) { 1044 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 1045 } 1046 for (i=0;i<nquads;i++) { 1047 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 1048 } 1049 ierr = PetscFree(quads);CHKERRQ(ierr); 1050 1051 /* set change of basis */ 1052 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1053 ierr = MatDestroy(&T);CHKERRQ(ierr); 1054 1055 /* set quadratures */ 1056 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 1057 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 1058 1059 PetscFunctionReturn(0); 1060 } 1061 1062 /* the near-null space of BDDC carries information on quadrature weights, 1063 and these can be collinear -> so cheat with MatNullSpaceCreate 1064 and create a suitable set of basis vectors first */ 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1067 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1068 { 1069 PetscErrorCode ierr; 1070 PetscInt i; 1071 1072 PetscFunctionBegin; 1073 for (i=0;i<nvecs;i++) { 1074 PetscInt first,last; 1075 1076 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1077 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1078 if (i>=first && i < last) { 1079 PetscScalar *data; 1080 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1081 if (!has_const) { 1082 data[i-first] = 1.; 1083 } else { 1084 data[2*i-first] = 1./PetscSqrtReal(2.); 1085 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1086 } 1087 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1088 } 1089 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1090 } 1091 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1092 for (i=0;i<nvecs;i++) { /* reset vectors */ 1093 PetscInt first,last; 1094 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1095 if (i>=first && i < last) { 1096 PetscScalar *data; 1097 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1098 if (!has_const) { 1099 data[i-first] = 0.; 1100 } else { 1101 data[2*i-first] = 0.; 1102 data[2*i-first+1] = 0.; 1103 } 1104 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1105 } 1106 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1107 } 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNCT__ 1112 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1113 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1114 { 1115 Mat loc_divudotp; 1116 Vec p,v,vins,quad_vec,*quad_vecs; 1117 ISLocalToGlobalMapping map; 1118 IS *faces,*edges; 1119 PetscScalar *vals; 1120 const PetscScalar *array; 1121 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1122 PetscMPIInt rank; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1127 if (graph->twodim) { 1128 lmaxneighs = 2; 1129 } else { 1130 lmaxneighs = 1; 1131 for (i=0;i<ne;i++) { 1132 const PetscInt *idxs; 1133 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1134 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1135 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1136 } 1137 lmaxneighs++; /* graph count does not include self */ 1138 } 1139 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1140 maxsize = 0; 1141 for (i=0;i<ne;i++) { 1142 PetscInt nn; 1143 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1144 maxsize = PetscMax(maxsize,nn); 1145 } 1146 for (i=0;i<nf;i++) { 1147 PetscInt nn; 1148 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1149 maxsize = PetscMax(maxsize,nn); 1150 } 1151 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1152 /* create vectors to hold quadrature weights */ 1153 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1154 if (!transpose) { 1155 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1156 } else { 1157 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1158 } 1159 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1160 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1161 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1162 for (i=0;i<maxneighs;i++) { 1163 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1164 } 1165 1166 /* compute local quad vec */ 1167 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1168 if (!transpose) { 1169 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1170 } else { 1171 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1172 } 1173 ierr = VecSet(p,1.);CHKERRQ(ierr); 1174 if (!transpose) { 1175 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1176 } else { 1177 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1178 } 1179 if (vl2l) { 1180 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1181 } else { 1182 vins = v; 1183 } 1184 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1185 ierr = VecDestroy(&p);CHKERRQ(ierr); 1186 1187 /* insert in global quadrature vecs */ 1188 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1189 for (i=0;i<nf;i++) { 1190 const PetscInt *idxs; 1191 PetscInt idx,nn,j; 1192 1193 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1194 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1195 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1196 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1197 idx = -(idx+1); 1198 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1199 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1200 } 1201 for (i=0;i<ne;i++) { 1202 const PetscInt *idxs; 1203 PetscInt idx,nn,j; 1204 1205 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1206 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1207 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1208 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1209 idx = -(idx+1); 1210 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1211 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1212 } 1213 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1214 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1215 if (vl2l) { 1216 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1217 } 1218 ierr = VecDestroy(&v);CHKERRQ(ierr); 1219 ierr = PetscFree(vals);CHKERRQ(ierr); 1220 1221 /* assemble near null space */ 1222 for (i=0;i<maxneighs;i++) { 1223 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1224 } 1225 for (i=0;i<maxneighs;i++) { 1226 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1227 } 1228 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1235 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1236 { 1237 PetscErrorCode ierr; 1238 Vec local,global; 1239 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1240 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1241 1242 PetscFunctionBegin; 1243 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1244 /* need to convert from global to local topology information and remove references to information in global ordering */ 1245 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1246 if (pcbddc->user_provided_isfordofs) { 1247 if (pcbddc->n_ISForDofs) { 1248 PetscInt i; 1249 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1250 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1251 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1252 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1253 } 1254 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1255 pcbddc->n_ISForDofs = 0; 1256 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1257 } 1258 } else { 1259 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1260 PetscInt i, n = matis->A->rmap->n; 1261 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1262 if (i > 1) { 1263 pcbddc->n_ISForDofsLocal = i; 1264 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1265 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1266 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1267 } 1268 } 1269 } 1270 } 1271 1272 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1273 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1274 } 1275 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1276 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1277 } 1278 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1279 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1280 } 1281 ierr = VecDestroy(&global);CHKERRQ(ierr); 1282 ierr = VecDestroy(&local);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1288 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1289 { 1290 PC_IS *pcis = (PC_IS*)(pc->data); 1291 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1292 PetscErrorCode ierr; 1293 1294 PetscFunctionBegin; 1295 if (!pcbddc->benign_have_null) { 1296 PetscFunctionReturn(0); 1297 } 1298 if (pcbddc->ChangeOfBasisMatrix) { 1299 Vec swap; 1300 1301 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1302 swap = pcbddc->work_change; 1303 pcbddc->work_change = r; 1304 r = swap; 1305 } 1306 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1307 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1308 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1309 ierr = VecSet(z,0.);CHKERRQ(ierr); 1310 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1311 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1312 if (pcbddc->ChangeOfBasisMatrix) { 1313 Vec swap; 1314 1315 swap = r; 1316 r = pcbddc->work_change; 1317 pcbddc->work_change = swap; 1318 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1319 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1320 } 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1326 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1327 { 1328 PCBDDCBenignMatMult_ctx ctx; 1329 PetscErrorCode ierr; 1330 PetscBool apply_right,apply_left,reset_x; 1331 1332 PetscFunctionBegin; 1333 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1334 if (transpose) { 1335 apply_right = ctx->apply_left; 1336 apply_left = ctx->apply_right; 1337 } else { 1338 apply_right = ctx->apply_right; 1339 apply_left = ctx->apply_left; 1340 } 1341 reset_x = PETSC_FALSE; 1342 if (apply_right) { 1343 const PetscScalar *ax; 1344 PetscInt nl,i; 1345 1346 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1347 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1348 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1349 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1350 for (i=0;i<ctx->benign_n;i++) { 1351 PetscScalar sum,val; 1352 const PetscInt *idxs; 1353 PetscInt nz,j; 1354 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1355 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1356 sum = 0.; 1357 if (ctx->apply_p0) { 1358 val = ctx->work[idxs[nz-1]]; 1359 for (j=0;j<nz-1;j++) { 1360 sum += ctx->work[idxs[j]]; 1361 ctx->work[idxs[j]] += val; 1362 } 1363 } else { 1364 for (j=0;j<nz-1;j++) { 1365 sum += ctx->work[idxs[j]]; 1366 } 1367 } 1368 ctx->work[idxs[nz-1]] -= sum; 1369 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1370 } 1371 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1372 reset_x = PETSC_TRUE; 1373 } 1374 if (transpose) { 1375 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1376 } else { 1377 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1378 } 1379 if (reset_x) { 1380 ierr = VecResetArray(x);CHKERRQ(ierr); 1381 } 1382 if (apply_left) { 1383 PetscScalar *ay; 1384 PetscInt i; 1385 1386 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1387 for (i=0;i<ctx->benign_n;i++) { 1388 PetscScalar sum,val; 1389 const PetscInt *idxs; 1390 PetscInt nz,j; 1391 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1392 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1393 val = -ay[idxs[nz-1]]; 1394 if (ctx->apply_p0) { 1395 sum = 0.; 1396 for (j=0;j<nz-1;j++) { 1397 sum += ay[idxs[j]]; 1398 ay[idxs[j]] += val; 1399 } 1400 ay[idxs[nz-1]] += sum; 1401 } else { 1402 for (j=0;j<nz-1;j++) { 1403 ay[idxs[j]] += val; 1404 } 1405 ay[idxs[nz-1]] = 0.; 1406 } 1407 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1408 } 1409 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1410 } 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1416 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1417 { 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1427 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1428 { 1429 PetscErrorCode ierr; 1430 1431 PetscFunctionBegin; 1432 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "PCBDDCBenignShellMat" 1438 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1439 { 1440 PC_IS *pcis = (PC_IS*)pc->data; 1441 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1442 PCBDDCBenignMatMult_ctx ctx; 1443 PetscErrorCode ierr; 1444 1445 PetscFunctionBegin; 1446 if (!restore) { 1447 Mat A_IB,A_BI; 1448 PetscScalar *work; 1449 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1450 1451 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1452 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1453 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1454 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1455 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1456 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1457 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1458 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1459 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1460 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1461 ctx->apply_left = PETSC_TRUE; 1462 ctx->apply_right = PETSC_FALSE; 1463 ctx->apply_p0 = PETSC_FALSE; 1464 ctx->benign_n = pcbddc->benign_n; 1465 if (reuse) { 1466 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1467 ctx->free = PETSC_FALSE; 1468 } else { /* TODO: could be optimized for successive solves */ 1469 ISLocalToGlobalMapping N_to_D; 1470 PetscInt i; 1471 1472 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1473 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1474 for (i=0;i<pcbddc->benign_n;i++) { 1475 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1476 } 1477 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1478 ctx->free = PETSC_TRUE; 1479 } 1480 ctx->A = pcis->A_IB; 1481 ctx->work = work; 1482 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1483 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1484 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1485 pcis->A_IB = A_IB; 1486 1487 /* A_BI as A_IB^T */ 1488 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1489 pcbddc->benign_original_mat = pcis->A_BI; 1490 pcis->A_BI = A_BI; 1491 } else { 1492 if (!pcbddc->benign_original_mat) { 1493 PetscFunctionReturn(0); 1494 } 1495 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1496 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1497 pcis->A_IB = ctx->A; 1498 ctx->A = NULL; 1499 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1500 pcis->A_BI = pcbddc->benign_original_mat; 1501 pcbddc->benign_original_mat = NULL; 1502 if (ctx->free) { 1503 PetscInt i; 1504 for (i=0;i<ctx->benign_n;i++) { 1505 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1506 } 1507 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1508 } 1509 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1510 ierr = PetscFree(ctx);CHKERRQ(ierr); 1511 } 1512 PetscFunctionReturn(0); 1513 } 1514 1515 /* used just in bddc debug mode */ 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "PCBDDCBenignProject" 1518 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1519 { 1520 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1521 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1522 Mat An; 1523 PetscErrorCode ierr; 1524 1525 PetscFunctionBegin; 1526 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1527 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1528 if (is1) { 1529 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1530 ierr = MatDestroy(&An);CHKERRQ(ierr); 1531 } else { 1532 *B = An; 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 /* TODO: add reuse flag */ 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatSeqAIJCompress" 1540 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1541 { 1542 Mat Bt; 1543 PetscScalar *a,*bdata; 1544 const PetscInt *ii,*ij; 1545 PetscInt m,n,i,nnz,*bii,*bij; 1546 PetscBool flg_row; 1547 PetscErrorCode ierr; 1548 1549 PetscFunctionBegin; 1550 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1551 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1552 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1553 nnz = n; 1554 for (i=0;i<ii[n];i++) { 1555 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1556 } 1557 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1558 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1559 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1560 nnz = 0; 1561 bii[0] = 0; 1562 for (i=0;i<n;i++) { 1563 PetscInt j; 1564 for (j=ii[i];j<ii[i+1];j++) { 1565 PetscScalar entry = a[j]; 1566 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1567 bij[nnz] = ij[j]; 1568 bdata[nnz] = entry; 1569 nnz++; 1570 } 1571 } 1572 bii[i+1] = nnz; 1573 } 1574 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1575 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1576 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1577 { 1578 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1579 b->free_a = PETSC_TRUE; 1580 b->free_ij = PETSC_TRUE; 1581 } 1582 *B = Bt; 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatDetectDisconnectedComponents" 1588 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1589 { 1590 Mat B; 1591 IS is_dummy,*cc_n; 1592 ISLocalToGlobalMapping l2gmap_dummy; 1593 PCBDDCGraph graph; 1594 PetscInt i,n; 1595 PetscInt *xadj,*adjncy; 1596 PetscInt *xadj_filtered,*adjncy_filtered; 1597 PetscBool flg_row,isseqaij; 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 if (!A->rmap->N || !A->cmap->N) { 1602 *ncc = 0; 1603 *cc = NULL; 1604 PetscFunctionReturn(0); 1605 } 1606 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1607 if (!isseqaij && filter) { 1608 PetscBool isseqdense; 1609 1610 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1611 if (!isseqdense) { 1612 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1613 } else { /* TODO: rectangular case and LDA */ 1614 PetscScalar *array; 1615 PetscReal chop=1.e-6; 1616 1617 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1618 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1619 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1620 for (i=0;i<n;i++) { 1621 PetscInt j; 1622 for (j=i+1;j<n;j++) { 1623 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1624 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1625 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1626 } 1627 } 1628 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1629 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1630 } 1631 } else { 1632 B = A; 1633 } 1634 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1635 1636 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1637 if (filter) { 1638 PetscScalar *data; 1639 PetscInt j,cum; 1640 1641 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1642 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1643 cum = 0; 1644 for (i=0;i<n;i++) { 1645 PetscInt t; 1646 1647 for (j=xadj[i];j<xadj[i+1];j++) { 1648 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1649 continue; 1650 } 1651 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1652 } 1653 t = xadj_filtered[i]; 1654 xadj_filtered[i] = cum; 1655 cum += t; 1656 } 1657 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1658 } else { 1659 xadj_filtered = NULL; 1660 adjncy_filtered = NULL; 1661 } 1662 1663 /* compute local connected components using PCBDDCGraph */ 1664 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1665 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1666 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1667 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1668 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1669 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1670 if (xadj_filtered) { 1671 graph->xadj = xadj_filtered; 1672 graph->adjncy = adjncy_filtered; 1673 } else { 1674 graph->xadj = xadj; 1675 graph->adjncy = adjncy; 1676 } 1677 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1678 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1679 /* partial clean up */ 1680 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1681 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1682 if (A != B) { 1683 ierr = MatDestroy(&B);CHKERRQ(ierr); 1684 } 1685 1686 /* get back data */ 1687 if (ncc) *ncc = graph->ncc; 1688 if (cc) { 1689 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1690 for (i=0;i<graph->ncc;i++) { 1691 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1692 } 1693 *cc = cc_n; 1694 } 1695 /* clean up graph */ 1696 graph->xadj = 0; 1697 graph->adjncy = 0; 1698 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "PCBDDCBenignCheck" 1704 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1705 { 1706 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1707 PC_IS* pcis = (PC_IS*)(pc->data); 1708 IS dirIS = NULL; 1709 PetscInt i; 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1714 if (zerodiag) { 1715 Mat A; 1716 Vec vec3_N; 1717 PetscScalar *vals; 1718 const PetscInt *idxs; 1719 PetscInt nz,*count; 1720 1721 /* p0 */ 1722 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1723 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1724 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1725 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1726 for (i=0;i<nz;i++) vals[i] = 1.; 1727 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1728 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1729 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1730 /* v_I */ 1731 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1732 for (i=0;i<nz;i++) vals[i] = 0.; 1733 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1734 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1735 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1736 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1737 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1738 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1739 if (dirIS) { 1740 PetscInt n; 1741 1742 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1743 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1744 for (i=0;i<n;i++) vals[i] = 0.; 1745 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1746 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1747 } 1748 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1749 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1750 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1751 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1752 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1753 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1754 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1755 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1756 ierr = PetscFree(vals);CHKERRQ(ierr); 1757 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1758 1759 /* there should not be any pressure dofs lying on the interface */ 1760 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1761 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1762 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1763 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1764 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1765 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1766 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1767 ierr = PetscFree(count);CHKERRQ(ierr); 1768 } 1769 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1770 1771 /* check PCBDDCBenignGetOrSetP0 */ 1772 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1773 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1774 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1775 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1776 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1777 for (i=0;i<pcbddc->benign_n;i++) { 1778 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1779 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1780 } 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1786 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1787 { 1788 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1789 IS pressures,zerodiag,*zerodiag_subs; 1790 PetscInt nz,n; 1791 PetscInt *interior_dofs,n_interior_dofs; 1792 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1793 PetscErrorCode ierr; 1794 1795 PetscFunctionBegin; 1796 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1797 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1798 for (n=0;n<pcbddc->benign_n;n++) { 1799 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1800 } 1801 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1802 pcbddc->benign_n = 0; 1803 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1804 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1805 Checks if all the pressure dofs in each subdomain have a zero diagonal 1806 If not, a change of basis on pressures is not needed 1807 since the local Schur complements are already SPD 1808 */ 1809 has_null_pressures = PETSC_TRUE; 1810 have_null = PETSC_TRUE; 1811 if (pcbddc->n_ISForDofsLocal) { 1812 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1813 1814 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1815 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1816 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1817 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1818 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1819 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1820 if (!sorted) { 1821 ierr = ISSort(pressures);CHKERRQ(ierr); 1822 } 1823 } else { 1824 pressures = NULL; 1825 } 1826 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1827 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1828 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1829 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1830 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1831 if (!sorted) { 1832 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1833 } 1834 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1835 if (!nz) { 1836 if (n) have_null = PETSC_FALSE; 1837 has_null_pressures = PETSC_FALSE; 1838 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1839 } 1840 recompute_zerodiag = PETSC_FALSE; 1841 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1842 zerodiag_subs = NULL; 1843 pcbddc->benign_n = 0; 1844 n_interior_dofs = 0; 1845 interior_dofs = NULL; 1846 if (pcbddc->current_level) { /* need to compute interior nodes */ 1847 PetscInt n,i,j; 1848 PetscInt n_neigh,*neigh,*n_shared,**shared; 1849 PetscInt *iwork; 1850 1851 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1852 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1853 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1854 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1855 for (i=1;i<n_neigh;i++) 1856 for (j=0;j<n_shared[i];j++) 1857 iwork[shared[i][j]] += 1; 1858 for (i=0;i<n;i++) 1859 if (!iwork[i]) 1860 interior_dofs[n_interior_dofs++] = i; 1861 ierr = PetscFree(iwork);CHKERRQ(ierr); 1862 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1863 } 1864 if (has_null_pressures) { 1865 IS *subs; 1866 PetscInt nsubs,i,j,nl; 1867 const PetscInt *idxs; 1868 PetscScalar *array; 1869 Vec *work; 1870 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1871 1872 subs = pcbddc->local_subs; 1873 nsubs = pcbddc->n_local_subs; 1874 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1875 if (pcbddc->current_level) { 1876 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1877 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1878 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1879 /* work[0] = 1_p */ 1880 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1881 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1882 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1883 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1884 /* work[0] = 1_v */ 1885 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1886 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1887 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1888 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1889 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1890 } 1891 if (nsubs > 1) { 1892 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1893 for (i=0;i<nsubs;i++) { 1894 ISLocalToGlobalMapping l2g; 1895 IS t_zerodiag_subs; 1896 PetscInt nl; 1897 1898 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1899 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1900 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1901 if (nl) { 1902 PetscBool valid = PETSC_TRUE; 1903 1904 if (pcbddc->current_level) { 1905 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1906 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1907 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1908 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1909 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1910 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1911 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 1912 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 1913 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1914 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 1915 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1916 for (j=0;j<n_interior_dofs;j++) { 1917 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1918 valid = PETSC_FALSE; 1919 break; 1920 } 1921 } 1922 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1923 } 1924 if (valid && pcbddc->NeumannBoundariesLocal) { 1925 IS t_bc; 1926 PetscInt nzb; 1927 1928 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 1929 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 1930 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 1931 if (nzb) valid = PETSC_FALSE; 1932 } 1933 if (valid && pressures) { 1934 IS t_pressure_subs; 1935 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 1936 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 1937 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 1938 } 1939 if (valid) { 1940 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 1941 pcbddc->benign_n++; 1942 } else { 1943 recompute_zerodiag = PETSC_TRUE; 1944 } 1945 } 1946 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 1947 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 1948 } 1949 } else { /* there's just one subdomain (or zero if they have not been detected */ 1950 PetscBool valid = PETSC_TRUE; 1951 1952 if (pcbddc->NeumannBoundariesLocal) { 1953 PetscInt nzb; 1954 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 1955 if (nzb) valid = PETSC_FALSE; 1956 } 1957 if (valid && pressures) { 1958 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 1959 } 1960 if (valid && pcbddc->current_level) { 1961 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 1962 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 1963 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1964 for (j=0;j<n_interior_dofs;j++) { 1965 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1966 valid = PETSC_FALSE; 1967 break; 1968 } 1969 } 1970 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1971 } 1972 if (valid) { 1973 pcbddc->benign_n = 1; 1974 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 1975 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 1976 zerodiag_subs[0] = zerodiag; 1977 } 1978 } 1979 if (pcbddc->current_level) { 1980 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 1981 } 1982 } 1983 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 1984 1985 if (!pcbddc->benign_n) { 1986 PetscInt n; 1987 1988 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1989 recompute_zerodiag = PETSC_FALSE; 1990 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1991 if (n) { 1992 has_null_pressures = PETSC_FALSE; 1993 have_null = PETSC_FALSE; 1994 } 1995 } 1996 1997 /* final check for null pressures */ 1998 if (zerodiag && pressures) { 1999 PetscInt nz,np; 2000 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2001 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2002 if (nz != np) have_null = PETSC_FALSE; 2003 } 2004 2005 if (recompute_zerodiag) { 2006 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2007 if (pcbddc->benign_n == 1) { 2008 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2009 zerodiag = zerodiag_subs[0]; 2010 } else { 2011 PetscInt i,nzn,*new_idxs; 2012 2013 nzn = 0; 2014 for (i=0;i<pcbddc->benign_n;i++) { 2015 PetscInt ns; 2016 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2017 nzn += ns; 2018 } 2019 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2020 nzn = 0; 2021 for (i=0;i<pcbddc->benign_n;i++) { 2022 PetscInt ns,*idxs; 2023 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2024 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2025 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2026 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2027 nzn += ns; 2028 } 2029 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2030 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2031 } 2032 have_null = PETSC_FALSE; 2033 } 2034 2035 /* Prepare matrix to compute no-net-flux */ 2036 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2037 Mat A,loc_divudotp; 2038 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2039 IS row,col,isused = NULL; 2040 PetscInt M,N,n,st,n_isused; 2041 2042 if (pressures) { 2043 isused = pressures; 2044 } else { 2045 isused = zerodiag; 2046 } 2047 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2048 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2049 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2050 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2051 n_isused = 0; 2052 if (isused) { 2053 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2054 } 2055 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2056 st = st-n_isused; 2057 if (n) { 2058 const PetscInt *gidxs; 2059 2060 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2061 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2062 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2063 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2064 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2065 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2066 } else { 2067 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2068 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2069 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2070 } 2071 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2072 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2073 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2074 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2075 ierr = ISDestroy(&row);CHKERRQ(ierr); 2076 ierr = ISDestroy(&col);CHKERRQ(ierr); 2077 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2078 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2079 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2080 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2081 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2082 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2083 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2084 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2085 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2086 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 } 2088 2089 /* change of basis and p0 dofs */ 2090 if (has_null_pressures) { 2091 IS zerodiagc; 2092 const PetscInt *idxs,*idxsc; 2093 PetscInt i,s,*nnz; 2094 2095 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2096 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2097 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2098 /* local change of basis for pressures */ 2099 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2100 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2101 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2102 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2103 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2104 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2105 for (i=0;i<pcbddc->benign_n;i++) { 2106 PetscInt nzs,j; 2107 2108 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2109 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2110 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2111 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2112 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2113 } 2114 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2115 ierr = PetscFree(nnz);CHKERRQ(ierr); 2116 /* set identity on velocities */ 2117 for (i=0;i<n-nz;i++) { 2118 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2119 } 2120 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2121 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2122 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2123 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2124 /* set change on pressures */ 2125 for (s=0;s<pcbddc->benign_n;s++) { 2126 PetscScalar *array; 2127 PetscInt nzs; 2128 2129 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2130 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2131 for (i=0;i<nzs-1;i++) { 2132 PetscScalar vals[2]; 2133 PetscInt cols[2]; 2134 2135 cols[0] = idxs[i]; 2136 cols[1] = idxs[nzs-1]; 2137 vals[0] = 1.; 2138 vals[1] = 1.; 2139 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2140 } 2141 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2142 for (i=0;i<nzs-1;i++) array[i] = -1.; 2143 array[nzs-1] = 1.; 2144 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2145 /* store local idxs for p0 */ 2146 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2147 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2148 ierr = PetscFree(array);CHKERRQ(ierr); 2149 } 2150 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2151 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2152 /* project if needed */ 2153 if (pcbddc->benign_change_explicit) { 2154 Mat M; 2155 2156 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2157 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2158 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2159 ierr = MatDestroy(&M);CHKERRQ(ierr); 2160 } 2161 /* store global idxs for p0 */ 2162 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2163 } 2164 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2165 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2166 2167 /* determines if the coarse solver will be singular or not */ 2168 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2169 /* determines if the problem has subdomains with 0 pressure block */ 2170 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2171 *zerodiaglocal = zerodiag; 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2177 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2178 { 2179 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2180 PetscScalar *array; 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 if (!pcbddc->benign_sf) { 2185 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2186 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2187 } 2188 if (get) { 2189 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2190 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2191 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2192 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2193 } else { 2194 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2195 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2196 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2197 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2198 } 2199 PetscFunctionReturn(0); 2200 } 2201 2202 #undef __FUNCT__ 2203 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2204 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2205 { 2206 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2207 PetscErrorCode ierr; 2208 2209 PetscFunctionBegin; 2210 /* TODO: add error checking 2211 - avoid nested pop (or push) calls. 2212 - cannot push before pop. 2213 - cannot call this if pcbddc->local_mat is NULL 2214 */ 2215 if (!pcbddc->benign_n) { 2216 PetscFunctionReturn(0); 2217 } 2218 if (pop) { 2219 if (pcbddc->benign_change_explicit) { 2220 IS is_p0; 2221 MatReuse reuse; 2222 2223 /* extract B_0 */ 2224 reuse = MAT_INITIAL_MATRIX; 2225 if (pcbddc->benign_B0) { 2226 reuse = MAT_REUSE_MATRIX; 2227 } 2228 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2229 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2230 /* remove rows and cols from local problem */ 2231 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2232 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2233 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2234 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2235 } else { 2236 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2237 PetscScalar *vals; 2238 PetscInt i,n,*idxs_ins; 2239 2240 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2241 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2242 if (!pcbddc->benign_B0) { 2243 PetscInt *nnz; 2244 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2245 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2246 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2247 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2248 for (i=0;i<pcbddc->benign_n;i++) { 2249 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2250 nnz[i] = n - nnz[i]; 2251 } 2252 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2253 ierr = PetscFree(nnz);CHKERRQ(ierr); 2254 } 2255 2256 for (i=0;i<pcbddc->benign_n;i++) { 2257 PetscScalar *array; 2258 PetscInt *idxs,j,nz,cum; 2259 2260 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2261 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2262 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2263 for (j=0;j<nz;j++) vals[j] = 1.; 2264 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2265 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2266 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2267 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2268 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2269 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2270 cum = 0; 2271 for (j=0;j<n;j++) { 2272 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2273 vals[cum] = array[j]; 2274 idxs_ins[cum] = j; 2275 cum++; 2276 } 2277 } 2278 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2279 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2280 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2281 } 2282 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2283 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2284 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2285 } 2286 } else { /* push */ 2287 if (pcbddc->benign_change_explicit) { 2288 PetscInt i; 2289 2290 for (i=0;i<pcbddc->benign_n;i++) { 2291 PetscScalar *B0_vals; 2292 PetscInt *B0_cols,B0_ncol; 2293 2294 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2295 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2296 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2297 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2298 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2299 } 2300 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2301 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2302 } else { 2303 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2304 } 2305 } 2306 PetscFunctionReturn(0); 2307 } 2308 2309 #undef __FUNCT__ 2310 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2311 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2312 { 2313 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2314 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2315 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2316 PetscBLASInt *B_iwork,*B_ifail; 2317 PetscScalar *work,lwork; 2318 PetscScalar *St,*S,*eigv; 2319 PetscScalar *Sarray,*Starray; 2320 PetscReal *eigs,thresh; 2321 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2322 PetscBool allocated_S_St; 2323 #if defined(PETSC_USE_COMPLEX) 2324 PetscReal *rwork; 2325 #endif 2326 PetscErrorCode ierr; 2327 2328 PetscFunctionBegin; 2329 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2330 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2331 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2332 2333 if (pcbddc->dbg_flag) { 2334 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2335 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2336 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2337 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2338 } 2339 2340 if (pcbddc->dbg_flag) { 2341 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2342 } 2343 2344 /* max size of subsets */ 2345 mss = 0; 2346 for (i=0;i<sub_schurs->n_subs;i++) { 2347 PetscInt subset_size; 2348 2349 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2350 mss = PetscMax(mss,subset_size); 2351 } 2352 2353 /* min/max and threshold */ 2354 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2355 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2356 nmax = PetscMax(nmin,nmax); 2357 allocated_S_St = PETSC_FALSE; 2358 if (nmin) { 2359 allocated_S_St = PETSC_TRUE; 2360 } 2361 2362 /* allocate lapack workspace */ 2363 cum = cum2 = 0; 2364 maxneigs = 0; 2365 for (i=0;i<sub_schurs->n_subs;i++) { 2366 PetscInt n,subset_size; 2367 2368 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2369 n = PetscMin(subset_size,nmax); 2370 cum += subset_size; 2371 cum2 += subset_size*n; 2372 maxneigs = PetscMax(maxneigs,n); 2373 } 2374 if (mss) { 2375 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2376 PetscBLASInt B_itype = 1; 2377 PetscBLASInt B_N = mss; 2378 PetscReal zero = 0.0; 2379 PetscReal eps = 0.0; /* dlamch? */ 2380 2381 B_lwork = -1; 2382 S = NULL; 2383 St = NULL; 2384 eigs = NULL; 2385 eigv = NULL; 2386 B_iwork = NULL; 2387 B_ifail = NULL; 2388 #if defined(PETSC_USE_COMPLEX) 2389 rwork = NULL; 2390 #endif 2391 thresh = 1.0; 2392 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2393 #if defined(PETSC_USE_COMPLEX) 2394 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2395 #else 2396 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2397 #endif 2398 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2399 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2400 } else { 2401 /* TODO */ 2402 } 2403 } else { 2404 lwork = 0; 2405 } 2406 2407 nv = 0; 2408 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2409 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2410 } 2411 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2412 if (allocated_S_St) { 2413 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2414 } 2415 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2416 #if defined(PETSC_USE_COMPLEX) 2417 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2418 #endif 2419 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2420 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2421 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2422 nv+cum,&pcbddc->adaptive_constraints_idxs, 2423 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2424 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2425 2426 maxneigs = 0; 2427 cum = cumarray = 0; 2428 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2429 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2430 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2431 const PetscInt *idxs; 2432 2433 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2434 for (cum=0;cum<nv;cum++) { 2435 pcbddc->adaptive_constraints_n[cum] = 1; 2436 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2437 pcbddc->adaptive_constraints_data[cum] = 1.0; 2438 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2439 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2440 } 2441 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2442 } 2443 2444 if (mss) { /* multilevel */ 2445 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2446 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2447 } 2448 2449 thresh = pcbddc->adaptive_threshold; 2450 for (i=0;i<sub_schurs->n_subs;i++) { 2451 const PetscInt *idxs; 2452 PetscReal upper,lower; 2453 PetscInt j,subset_size,eigs_start = 0; 2454 PetscBLASInt B_N; 2455 PetscBool same_data = PETSC_FALSE; 2456 2457 if (pcbddc->use_deluxe_scaling) { 2458 upper = PETSC_MAX_REAL; 2459 lower = thresh; 2460 } else { 2461 upper = 1./thresh; 2462 lower = 0.; 2463 } 2464 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2465 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2466 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2467 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2468 if (sub_schurs->is_hermitian) { 2469 PetscInt j,k; 2470 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2471 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2472 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2473 } 2474 for (j=0;j<subset_size;j++) { 2475 for (k=j;k<subset_size;k++) { 2476 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2477 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2478 } 2479 } 2480 } else { 2481 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2482 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2483 } 2484 } else { 2485 S = Sarray + cumarray; 2486 St = Starray + cumarray; 2487 } 2488 /* see if we can save some work */ 2489 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2490 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2491 } 2492 2493 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2494 B_neigs = 0; 2495 } else { 2496 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2497 PetscBLASInt B_itype = 1; 2498 PetscBLASInt B_IL, B_IU; 2499 PetscReal eps = -1.0; /* dlamch? */ 2500 PetscInt nmin_s; 2501 PetscBool compute_range = PETSC_FALSE; 2502 2503 if (pcbddc->dbg_flag) { 2504 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2505 } 2506 2507 compute_range = PETSC_FALSE; 2508 if (thresh > 1.+PETSC_SMALL && !same_data) { 2509 compute_range = PETSC_TRUE; 2510 } 2511 2512 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2513 if (compute_range) { 2514 2515 /* ask for eigenvalues larger than thresh */ 2516 #if defined(PETSC_USE_COMPLEX) 2517 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2518 #else 2519 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2520 #endif 2521 } else if (!same_data) { 2522 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2523 B_IL = 1; 2524 #if defined(PETSC_USE_COMPLEX) 2525 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2526 #else 2527 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2528 #endif 2529 } else { /* same_data is true, so get the adaptive function requested by the user */ 2530 PetscInt k; 2531 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2532 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2533 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2534 nmin = nmax; 2535 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2536 for (k=0;k<nmax;k++) { 2537 eigs[k] = 1./PETSC_SMALL; 2538 eigv[k*(subset_size+1)] = 1.0; 2539 } 2540 } 2541 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2542 if (B_ierr) { 2543 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2544 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2545 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2546 } 2547 2548 if (B_neigs > nmax) { 2549 if (pcbddc->dbg_flag) { 2550 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2551 } 2552 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2553 B_neigs = nmax; 2554 } 2555 2556 nmin_s = PetscMin(nmin,B_N); 2557 if (B_neigs < nmin_s) { 2558 PetscBLASInt B_neigs2; 2559 2560 if (pcbddc->use_deluxe_scaling) { 2561 B_IL = B_N - nmin_s + 1; 2562 B_IU = B_N - B_neigs; 2563 } else { 2564 B_IL = B_neigs + 1; 2565 B_IU = nmin_s; 2566 } 2567 if (pcbddc->dbg_flag) { 2568 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2569 } 2570 if (sub_schurs->is_hermitian) { 2571 PetscInt j,k; 2572 for (j=0;j<subset_size;j++) { 2573 for (k=j;k<subset_size;k++) { 2574 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2575 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2576 } 2577 } 2578 } else { 2579 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2580 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2581 } 2582 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2583 #if defined(PETSC_USE_COMPLEX) 2584 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2585 #else 2586 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2587 #endif 2588 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2589 B_neigs += B_neigs2; 2590 } 2591 if (B_ierr) { 2592 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2593 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2594 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2595 } 2596 if (pcbddc->dbg_flag) { 2597 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2598 for (j=0;j<B_neigs;j++) { 2599 if (eigs[j] == 0.0) { 2600 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2601 } else { 2602 if (pcbddc->use_deluxe_scaling) { 2603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2604 } else { 2605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2606 } 2607 } 2608 } 2609 } 2610 } else { 2611 /* TODO */ 2612 } 2613 } 2614 /* change the basis back to the original one */ 2615 if (sub_schurs->change) { 2616 Mat change,phi,phit; 2617 2618 if (pcbddc->dbg_flag > 1) { 2619 PetscInt ii; 2620 for (ii=0;ii<B_neigs;ii++) { 2621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2622 for (j=0;j<B_N;j++) { 2623 #if defined(PETSC_USE_COMPLEX) 2624 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2625 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2627 #else 2628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2629 #endif 2630 } 2631 } 2632 } 2633 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2634 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2635 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2636 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2637 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2638 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2639 } 2640 maxneigs = PetscMax(B_neigs,maxneigs); 2641 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2642 if (B_neigs) { 2643 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2644 2645 if (pcbddc->dbg_flag > 1) { 2646 PetscInt ii; 2647 for (ii=0;ii<B_neigs;ii++) { 2648 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2649 for (j=0;j<B_N;j++) { 2650 #if defined(PETSC_USE_COMPLEX) 2651 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2652 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2653 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2654 #else 2655 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2656 #endif 2657 } 2658 } 2659 } 2660 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2661 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2662 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2663 cum++; 2664 } 2665 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2666 /* shift for next computation */ 2667 cumarray += subset_size*subset_size; 2668 } 2669 if (pcbddc->dbg_flag) { 2670 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2671 } 2672 2673 if (mss) { 2674 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2675 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2676 /* destroy matrices (junk) */ 2677 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2678 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2679 } 2680 if (allocated_S_St) { 2681 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2682 } 2683 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2684 #if defined(PETSC_USE_COMPLEX) 2685 ierr = PetscFree(rwork);CHKERRQ(ierr); 2686 #endif 2687 if (pcbddc->dbg_flag) { 2688 PetscInt maxneigs_r; 2689 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2690 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2691 } 2692 PetscFunctionReturn(0); 2693 } 2694 2695 #undef __FUNCT__ 2696 #define __FUNCT__ "PCBDDCSetUpSolvers" 2697 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2698 { 2699 PetscScalar *coarse_submat_vals; 2700 PetscErrorCode ierr; 2701 2702 PetscFunctionBegin; 2703 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2704 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2705 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2706 2707 /* Setup local neumann solver ksp_R */ 2708 /* PCBDDCSetUpLocalScatters should be called first! */ 2709 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2710 2711 /* 2712 Setup local correction and local part of coarse basis. 2713 Gives back the dense local part of the coarse matrix in column major ordering 2714 */ 2715 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2716 2717 /* Compute total number of coarse nodes and setup coarse solver */ 2718 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2719 2720 /* free */ 2721 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2722 PetscFunctionReturn(0); 2723 } 2724 2725 #undef __FUNCT__ 2726 #define __FUNCT__ "PCBDDCResetCustomization" 2727 PetscErrorCode PCBDDCResetCustomization(PC pc) 2728 { 2729 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2730 PetscErrorCode ierr; 2731 2732 PetscFunctionBegin; 2733 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2734 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2735 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2736 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2737 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2738 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2739 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2740 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2741 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2742 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2743 PetscFunctionReturn(0); 2744 } 2745 2746 #undef __FUNCT__ 2747 #define __FUNCT__ "PCBDDCResetTopography" 2748 PetscErrorCode PCBDDCResetTopography(PC pc) 2749 { 2750 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2751 PetscInt i; 2752 PetscErrorCode ierr; 2753 2754 PetscFunctionBegin; 2755 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2756 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2757 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2758 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2759 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2760 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2761 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2762 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2763 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2764 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2765 for (i=0;i<pcbddc->n_local_subs;i++) { 2766 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2767 } 2768 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2769 if (pcbddc->sub_schurs) { 2770 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2771 } 2772 pcbddc->graphanalyzed = PETSC_FALSE; 2773 PetscFunctionReturn(0); 2774 } 2775 2776 #undef __FUNCT__ 2777 #define __FUNCT__ "PCBDDCResetSolvers" 2778 PetscErrorCode PCBDDCResetSolvers(PC pc) 2779 { 2780 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2781 PetscErrorCode ierr; 2782 2783 PetscFunctionBegin; 2784 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2785 if (pcbddc->coarse_phi_B) { 2786 PetscScalar *array; 2787 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2788 ierr = PetscFree(array);CHKERRQ(ierr); 2789 } 2790 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2791 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2792 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2793 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2794 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2795 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2796 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2797 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2798 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2799 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2800 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2801 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2802 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2803 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2804 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2805 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2806 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2807 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2808 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2809 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2810 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2811 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2812 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2813 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2814 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2815 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2816 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2817 if (pcbddc->benign_zerodiag_subs) { 2818 PetscInt i; 2819 for (i=0;i<pcbddc->benign_n;i++) { 2820 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2821 } 2822 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2823 } 2824 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2825 PetscFunctionReturn(0); 2826 } 2827 2828 #undef __FUNCT__ 2829 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2830 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2831 { 2832 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2833 PC_IS *pcis = (PC_IS*)pc->data; 2834 VecType impVecType; 2835 PetscInt n_constraints,n_R,old_size; 2836 PetscErrorCode ierr; 2837 2838 PetscFunctionBegin; 2839 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2840 /* get sizes */ 2841 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2842 n_R = pcis->n - pcbddc->n_vertices; 2843 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2844 /* local work vectors (try to avoid unneeded work)*/ 2845 /* R nodes */ 2846 old_size = -1; 2847 if (pcbddc->vec1_R) { 2848 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2849 } 2850 if (n_R != old_size) { 2851 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2852 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2853 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2854 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2855 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2856 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2857 } 2858 /* local primal dofs */ 2859 old_size = -1; 2860 if (pcbddc->vec1_P) { 2861 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2862 } 2863 if (pcbddc->local_primal_size != old_size) { 2864 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2865 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2866 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2867 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2868 } 2869 /* local explicit constraints */ 2870 old_size = -1; 2871 if (pcbddc->vec1_C) { 2872 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2873 } 2874 if (n_constraints && n_constraints != old_size) { 2875 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2876 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2877 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2878 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2879 } 2880 PetscFunctionReturn(0); 2881 } 2882 2883 #undef __FUNCT__ 2884 #define __FUNCT__ "PCBDDCSetUpCorrection" 2885 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2886 { 2887 PetscErrorCode ierr; 2888 /* pointers to pcis and pcbddc */ 2889 PC_IS* pcis = (PC_IS*)pc->data; 2890 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2891 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2892 /* submatrices of local problem */ 2893 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2894 /* submatrices of local coarse problem */ 2895 Mat S_VV,S_CV,S_VC,S_CC; 2896 /* working matrices */ 2897 Mat C_CR; 2898 /* additional working stuff */ 2899 PC pc_R; 2900 Mat F; 2901 Vec dummy_vec; 2902 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2903 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2904 PetscScalar *work; 2905 PetscInt *idx_V_B; 2906 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2907 PetscInt i,n_R,n_D,n_B; 2908 2909 /* some shortcuts to scalars */ 2910 PetscScalar one=1.0,m_one=-1.0; 2911 2912 PetscFunctionBegin; 2913 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 2914 2915 /* Set Non-overlapping dimensions */ 2916 n_vertices = pcbddc->n_vertices; 2917 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 2918 n_B = pcis->n_B; 2919 n_D = pcis->n - n_B; 2920 n_R = pcis->n - n_vertices; 2921 2922 /* vertices in boundary numbering */ 2923 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 2924 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 2925 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 2926 2927 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2928 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 2929 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 2930 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 2931 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 2932 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 2933 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 2934 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 2935 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 2936 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 2937 2938 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 2939 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 2940 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 2941 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 2942 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 2943 lda_rhs = n_R; 2944 need_benign_correction = PETSC_FALSE; 2945 if (isLU || isILU || isCHOL) { 2946 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 2947 } else if (sub_schurs && sub_schurs->reuse_solver) { 2948 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2949 MatFactorType type; 2950 2951 F = reuse_solver->F; 2952 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 2953 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 2954 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 2955 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 2956 } else { 2957 F = NULL; 2958 } 2959 2960 /* allocate workspace */ 2961 n = 0; 2962 if (n_constraints) { 2963 n += lda_rhs*n_constraints; 2964 } 2965 if (n_vertices) { 2966 n = PetscMax(2*lda_rhs*n_vertices,n); 2967 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 2968 } 2969 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 2970 2971 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 2972 dummy_vec = NULL; 2973 if (need_benign_correction && lda_rhs != n_R && F) { 2974 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 2975 } 2976 2977 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2978 if (n_constraints) { 2979 Mat M1,M2,M3,C_B; 2980 IS is_aux; 2981 PetscScalar *array,*array2; 2982 2983 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2984 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2985 2986 /* Extract constraints on R nodes: C_{CR} */ 2987 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 2988 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2989 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2990 2991 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 2992 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 2993 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2994 for (i=0;i<n_constraints;i++) { 2995 const PetscScalar *row_cmat_values; 2996 const PetscInt *row_cmat_indices; 2997 PetscInt size_of_constraint,j; 2998 2999 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3000 for (j=0;j<size_of_constraint;j++) { 3001 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3002 } 3003 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3004 } 3005 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3006 if (F) { 3007 Mat B; 3008 3009 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3010 if (need_benign_correction) { 3011 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3012 3013 /* rhs is already zero on interior dofs, no need to change the rhs */ 3014 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3015 } 3016 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3017 if (need_benign_correction) { 3018 PetscScalar *marr; 3019 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3020 3021 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3022 if (lda_rhs != n_R) { 3023 for (i=0;i<n_constraints;i++) { 3024 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3025 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3026 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3027 } 3028 } else { 3029 for (i=0;i<n_constraints;i++) { 3030 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3031 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3032 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3033 } 3034 } 3035 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3036 } 3037 ierr = MatDestroy(&B);CHKERRQ(ierr); 3038 } else { 3039 PetscScalar *marr; 3040 3041 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3042 for (i=0;i<n_constraints;i++) { 3043 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3044 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3045 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3046 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3047 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3048 } 3049 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3050 } 3051 if (!pcbddc->switch_static) { 3052 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3053 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3054 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3055 for (i=0;i<n_constraints;i++) { 3056 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3057 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3058 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3059 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3060 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3061 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3062 } 3063 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3064 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3065 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3066 } else { 3067 if (lda_rhs != n_R) { 3068 IS dummy; 3069 3070 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3071 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3072 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3073 } else { 3074 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3075 pcbddc->local_auxmat2 = local_auxmat2_R; 3076 } 3077 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3078 } 3079 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3080 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3081 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3082 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3083 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3084 if (isCHOL) { 3085 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3086 } else { 3087 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3088 } 3089 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3090 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3091 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3092 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3093 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3094 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3095 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3096 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3097 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3098 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3099 } 3100 3101 /* Get submatrices from subdomain matrix */ 3102 if (n_vertices) { 3103 IS is_aux; 3104 3105 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3106 IS tis; 3107 3108 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3109 ierr = ISSort(tis);CHKERRQ(ierr); 3110 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3111 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3112 } else { 3113 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3114 } 3115 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3116 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3117 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3118 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3119 } 3120 3121 /* Matrix of coarse basis functions (local) */ 3122 if (pcbddc->coarse_phi_B) { 3123 PetscInt on_B,on_primal,on_D=n_D; 3124 if (pcbddc->coarse_phi_D) { 3125 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3126 } 3127 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3128 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3129 PetscScalar *marray; 3130 3131 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3132 ierr = PetscFree(marray);CHKERRQ(ierr); 3133 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3134 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3135 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3136 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3137 } 3138 } 3139 3140 if (!pcbddc->coarse_phi_B) { 3141 PetscScalar *marray; 3142 3143 n = n_B*pcbddc->local_primal_size; 3144 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3145 n += n_D*pcbddc->local_primal_size; 3146 } 3147 if (!pcbddc->symmetric_primal) { 3148 n *= 2; 3149 } 3150 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3151 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3152 n = n_B*pcbddc->local_primal_size; 3153 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3154 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3155 n += n_D*pcbddc->local_primal_size; 3156 } 3157 if (!pcbddc->symmetric_primal) { 3158 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3159 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3160 n = n_B*pcbddc->local_primal_size; 3161 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3162 } 3163 } else { 3164 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3165 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3166 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3167 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3168 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3169 } 3170 } 3171 } 3172 3173 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3174 p0_lidx_I = NULL; 3175 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3176 const PetscInt *idxs; 3177 3178 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3179 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3180 for (i=0;i<pcbddc->benign_n;i++) { 3181 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3182 } 3183 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3184 } 3185 3186 /* vertices */ 3187 if (n_vertices) { 3188 3189 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3190 3191 if (n_R) { 3192 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3193 PetscBLASInt B_N,B_one = 1; 3194 PetscScalar *x,*y; 3195 PetscBool isseqaij; 3196 3197 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3198 if (need_benign_correction) { 3199 ISLocalToGlobalMapping RtoN; 3200 IS is_p0; 3201 PetscInt *idxs_p0,n; 3202 3203 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3204 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3205 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3206 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3207 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3208 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3209 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3210 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3211 } 3212 3213 if (lda_rhs == n_R) { 3214 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3215 } else { 3216 PetscScalar *av,*array; 3217 const PetscInt *xadj,*adjncy; 3218 PetscInt n; 3219 PetscBool flg_row; 3220 3221 array = work+lda_rhs*n_vertices; 3222 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3223 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3224 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3225 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3226 for (i=0;i<n;i++) { 3227 PetscInt j; 3228 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3229 } 3230 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3231 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3232 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3233 } 3234 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3235 if (need_benign_correction) { 3236 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3237 PetscScalar *marr; 3238 3239 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3240 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3241 3242 | 0 0 0 | (V) 3243 L = | 0 0 -1 | (P-p0) 3244 | 0 0 -1 | (p0) 3245 3246 */ 3247 for (i=0;i<reuse_solver->benign_n;i++) { 3248 const PetscScalar *vals; 3249 const PetscInt *idxs,*idxs_zero; 3250 PetscInt n,j,nz; 3251 3252 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3253 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3254 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3255 for (j=0;j<n;j++) { 3256 PetscScalar val = vals[j]; 3257 PetscInt k,col = idxs[j]; 3258 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3259 } 3260 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3261 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3262 } 3263 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3264 } 3265 if (F) { 3266 /* need to correct the rhs */ 3267 if (need_benign_correction) { 3268 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3269 PetscScalar *marr; 3270 3271 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3272 if (lda_rhs != n_R) { 3273 for (i=0;i<n_vertices;i++) { 3274 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3275 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3276 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3277 } 3278 } else { 3279 for (i=0;i<n_vertices;i++) { 3280 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3281 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3282 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3283 } 3284 } 3285 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3286 } 3287 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3288 /* need to correct the solution */ 3289 if (need_benign_correction) { 3290 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3291 PetscScalar *marr; 3292 3293 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3294 if (lda_rhs != n_R) { 3295 for (i=0;i<n_vertices;i++) { 3296 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3297 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3298 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3299 } 3300 } else { 3301 for (i=0;i<n_vertices;i++) { 3302 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3303 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3304 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3305 } 3306 } 3307 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3308 } 3309 } else { 3310 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3311 for (i=0;i<n_vertices;i++) { 3312 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3313 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3314 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3315 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3316 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3317 } 3318 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3319 } 3320 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3321 /* S_VV and S_CV */ 3322 if (n_constraints) { 3323 Mat B; 3324 3325 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3326 for (i=0;i<n_vertices;i++) { 3327 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3328 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3329 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3330 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3331 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3332 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3333 } 3334 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3335 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3336 ierr = MatDestroy(&B);CHKERRQ(ierr); 3337 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3338 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3339 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3340 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3341 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3342 ierr = MatDestroy(&B);CHKERRQ(ierr); 3343 } 3344 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3345 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3346 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3347 } 3348 if (lda_rhs != n_R) { 3349 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3350 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3351 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3352 } 3353 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3354 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3355 if (need_benign_correction) { 3356 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3357 PetscScalar *marr,*sums; 3358 3359 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3360 ierr = MatDenseGetArray(S_VVt,&marr); 3361 for (i=0;i<reuse_solver->benign_n;i++) { 3362 const PetscScalar *vals; 3363 const PetscInt *idxs,*idxs_zero; 3364 PetscInt n,j,nz; 3365 3366 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3367 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3368 for (j=0;j<n_vertices;j++) { 3369 PetscInt k; 3370 sums[j] = 0.; 3371 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3372 } 3373 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3374 for (j=0;j<n;j++) { 3375 PetscScalar val = vals[j]; 3376 PetscInt k; 3377 for (k=0;k<n_vertices;k++) { 3378 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3379 } 3380 } 3381 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3382 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3383 } 3384 ierr = PetscFree(sums);CHKERRQ(ierr); 3385 ierr = MatDenseRestoreArray(S_VVt,&marr); 3386 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3387 } 3388 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3389 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3390 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3391 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3392 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3393 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3394 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3395 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3396 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3397 } else { 3398 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3399 } 3400 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3401 3402 /* coarse basis functions */ 3403 for (i=0;i<n_vertices;i++) { 3404 PetscScalar *y; 3405 3406 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3407 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3408 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3409 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3410 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3411 y[n_B*i+idx_V_B[i]] = 1.0; 3412 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3413 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3414 3415 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3416 PetscInt j; 3417 3418 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3419 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3420 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3421 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3422 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3423 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3424 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3425 } 3426 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3427 } 3428 /* if n_R == 0 the object is not destroyed */ 3429 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3430 } 3431 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3432 3433 if (n_constraints) { 3434 Mat B; 3435 3436 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3437 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3438 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3439 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3440 if (n_vertices) { 3441 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3442 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3443 } else { 3444 Mat S_VCt; 3445 3446 if (lda_rhs != n_R) { 3447 ierr = MatDestroy(&B);CHKERRQ(ierr); 3448 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3449 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3450 } 3451 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3452 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3453 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3454 } 3455 } 3456 ierr = MatDestroy(&B);CHKERRQ(ierr); 3457 /* coarse basis functions */ 3458 for (i=0;i<n_constraints;i++) { 3459 PetscScalar *y; 3460 3461 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3462 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3463 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3464 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3465 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3466 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3467 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3468 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3469 PetscInt j; 3470 3471 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3472 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3473 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3474 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3475 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3476 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3477 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3478 } 3479 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3480 } 3481 } 3482 if (n_constraints) { 3483 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3484 } 3485 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3486 3487 /* coarse matrix entries relative to B_0 */ 3488 if (pcbddc->benign_n) { 3489 Mat B0_B,B0_BPHI; 3490 IS is_dummy; 3491 PetscScalar *data; 3492 PetscInt j; 3493 3494 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3495 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3496 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3497 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3498 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3499 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3500 for (j=0;j<pcbddc->benign_n;j++) { 3501 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3502 for (i=0;i<pcbddc->local_primal_size;i++) { 3503 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3504 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3505 } 3506 } 3507 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3508 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3509 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3510 } 3511 3512 /* compute other basis functions for non-symmetric problems */ 3513 if (!pcbddc->symmetric_primal) { 3514 Mat B_V=NULL,B_C=NULL; 3515 PetscScalar *marray; 3516 3517 if (n_constraints) { 3518 Mat S_CCT,C_CRT; 3519 3520 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3521 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3522 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3523 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3524 if (n_vertices) { 3525 Mat S_VCT; 3526 3527 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3528 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3529 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3530 } 3531 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3532 } else { 3533 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3534 } 3535 if (n_vertices && n_R) { 3536 PetscScalar *av,*marray; 3537 const PetscInt *xadj,*adjncy; 3538 PetscInt n; 3539 PetscBool flg_row; 3540 3541 /* B_V = B_V - A_VR^T */ 3542 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3543 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3544 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3545 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3546 for (i=0;i<n;i++) { 3547 PetscInt j; 3548 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3549 } 3550 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3551 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3552 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3553 } 3554 3555 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3556 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3557 for (i=0;i<n_vertices;i++) { 3558 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3559 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3560 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3561 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3562 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3563 } 3564 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3565 if (B_C) { 3566 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3567 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3568 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3569 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3570 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3571 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3572 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3573 } 3574 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3575 } 3576 /* coarse basis functions */ 3577 for (i=0;i<pcbddc->local_primal_size;i++) { 3578 PetscScalar *y; 3579 3580 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3581 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3582 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3583 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3584 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3585 if (i<n_vertices) { 3586 y[n_B*i+idx_V_B[i]] = 1.0; 3587 } 3588 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3589 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3590 3591 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3592 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3593 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3594 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3595 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3596 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3597 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3598 } 3599 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3600 } 3601 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3602 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3603 } 3604 /* free memory */ 3605 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3606 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3607 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3608 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3609 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3610 ierr = PetscFree(work);CHKERRQ(ierr); 3611 if (n_vertices) { 3612 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3613 } 3614 if (n_constraints) { 3615 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3616 } 3617 /* Checking coarse_sub_mat and coarse basis functios */ 3618 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3619 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3620 if (pcbddc->dbg_flag) { 3621 Mat coarse_sub_mat; 3622 Mat AUXMAT,TM1,TM2,TM3,TM4; 3623 Mat coarse_phi_D,coarse_phi_B; 3624 Mat coarse_psi_D,coarse_psi_B; 3625 Mat A_II,A_BB,A_IB,A_BI; 3626 Mat C_B,CPHI; 3627 IS is_dummy; 3628 Vec mones; 3629 MatType checkmattype=MATSEQAIJ; 3630 PetscReal real_value; 3631 3632 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3633 Mat A; 3634 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3635 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3636 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3637 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3638 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3639 ierr = MatDestroy(&A);CHKERRQ(ierr); 3640 } else { 3641 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3642 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3643 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3644 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3645 } 3646 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3647 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3648 if (!pcbddc->symmetric_primal) { 3649 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3650 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3651 } 3652 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3653 3654 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3655 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3656 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3657 if (!pcbddc->symmetric_primal) { 3658 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3659 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3660 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3661 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3662 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3663 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3664 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3665 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3666 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3667 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3668 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3669 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3670 } else { 3671 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3672 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3673 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3674 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3675 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3676 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3677 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3678 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3679 } 3680 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3681 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3682 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3683 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3684 if (pcbddc->benign_n) { 3685 Mat B0_B,B0_BPHI; 3686 PetscScalar *data,*data2; 3687 PetscInt j; 3688 3689 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3690 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3691 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3692 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3693 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3694 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3695 for (j=0;j<pcbddc->benign_n;j++) { 3696 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3697 for (i=0;i<pcbddc->local_primal_size;i++) { 3698 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3699 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3700 } 3701 } 3702 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3703 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3704 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3705 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3706 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3707 } 3708 #if 0 3709 { 3710 PetscViewer viewer; 3711 char filename[256]; 3712 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3713 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3714 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3715 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3716 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3717 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3718 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3719 if (save_change) { 3720 Mat phi_B; 3721 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3722 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3723 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3724 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3725 } else { 3726 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3727 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3728 } 3729 if (pcbddc->coarse_phi_D) { 3730 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3731 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3732 } 3733 if (pcbddc->coarse_psi_B) { 3734 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3735 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3736 } 3737 if (pcbddc->coarse_psi_D) { 3738 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3739 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3740 } 3741 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3742 } 3743 #endif 3744 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3745 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3746 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3747 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3748 3749 /* check constraints */ 3750 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3751 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3752 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3753 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3754 } else { 3755 PetscScalar *data; 3756 Mat tmat; 3757 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3758 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3759 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3760 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3761 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3762 } 3763 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3764 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3765 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3766 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3767 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3768 if (!pcbddc->symmetric_primal) { 3769 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3770 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3771 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3772 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3773 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3774 } 3775 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3776 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3777 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3778 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3779 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3780 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3781 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3782 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3783 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3784 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3785 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3786 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3787 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3788 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3789 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3790 if (!pcbddc->symmetric_primal) { 3791 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3792 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3793 } 3794 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3795 } 3796 /* get back data */ 3797 *coarse_submat_vals_n = coarse_submat_vals; 3798 PetscFunctionReturn(0); 3799 } 3800 3801 #undef __FUNCT__ 3802 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3803 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3804 { 3805 Mat *work_mat; 3806 IS isrow_s,iscol_s; 3807 PetscBool rsorted,csorted; 3808 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3809 PetscErrorCode ierr; 3810 3811 PetscFunctionBegin; 3812 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3813 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3814 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3815 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3816 3817 if (!rsorted) { 3818 const PetscInt *idxs; 3819 PetscInt *idxs_sorted,i; 3820 3821 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3822 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3823 for (i=0;i<rsize;i++) { 3824 idxs_perm_r[i] = i; 3825 } 3826 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3827 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3828 for (i=0;i<rsize;i++) { 3829 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3830 } 3831 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3832 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3833 } else { 3834 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3835 isrow_s = isrow; 3836 } 3837 3838 if (!csorted) { 3839 if (isrow == iscol) { 3840 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3841 iscol_s = isrow_s; 3842 } else { 3843 const PetscInt *idxs; 3844 PetscInt *idxs_sorted,i; 3845 3846 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3847 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3848 for (i=0;i<csize;i++) { 3849 idxs_perm_c[i] = i; 3850 } 3851 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3852 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3853 for (i=0;i<csize;i++) { 3854 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3855 } 3856 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3857 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3858 } 3859 } else { 3860 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3861 iscol_s = iscol; 3862 } 3863 3864 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3865 3866 if (!rsorted || !csorted) { 3867 Mat new_mat; 3868 IS is_perm_r,is_perm_c; 3869 3870 if (!rsorted) { 3871 PetscInt *idxs_r,i; 3872 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3873 for (i=0;i<rsize;i++) { 3874 idxs_r[idxs_perm_r[i]] = i; 3875 } 3876 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3877 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3878 } else { 3879 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3880 } 3881 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3882 3883 if (!csorted) { 3884 if (isrow_s == iscol_s) { 3885 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3886 is_perm_c = is_perm_r; 3887 } else { 3888 PetscInt *idxs_c,i; 3889 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3890 for (i=0;i<csize;i++) { 3891 idxs_c[idxs_perm_c[i]] = i; 3892 } 3893 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3894 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3895 } 3896 } else { 3897 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3898 } 3899 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3900 3901 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3902 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3903 work_mat[0] = new_mat; 3904 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3905 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3906 } 3907 3908 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 3909 *B = work_mat[0]; 3910 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 3911 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 3912 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 3913 PetscFunctionReturn(0); 3914 } 3915 3916 #undef __FUNCT__ 3917 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 3918 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 3919 { 3920 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3921 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3922 Mat new_mat; 3923 IS is_local,is_global; 3924 PetscInt local_size; 3925 PetscBool isseqaij; 3926 PetscErrorCode ierr; 3927 3928 PetscFunctionBegin; 3929 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3930 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 3931 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 3932 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 3933 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 3934 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 3935 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 3936 3937 /* check */ 3938 if (pcbddc->dbg_flag) { 3939 Vec x,x_change; 3940 PetscReal error; 3941 3942 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 3943 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3944 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 3945 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3946 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3947 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 3948 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3949 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3950 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3951 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3952 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3953 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 3954 ierr = VecDestroy(&x);CHKERRQ(ierr); 3955 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3956 } 3957 3958 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 3959 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3960 if (isseqaij) { 3961 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3962 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3963 } else { 3964 Mat work_mat; 3965 3966 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3967 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3968 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3969 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 3970 } 3971 if (matis->A->symmetric_set) { 3972 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 3973 #if !defined(PETSC_USE_COMPLEX) 3974 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 3975 #endif 3976 } 3977 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 3978 PetscFunctionReturn(0); 3979 } 3980 3981 #undef __FUNCT__ 3982 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 3983 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 3984 { 3985 PC_IS* pcis = (PC_IS*)(pc->data); 3986 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3987 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3988 PetscInt *idx_R_local=NULL; 3989 PetscInt n_vertices,i,j,n_R,n_D,n_B; 3990 PetscInt vbs,bs; 3991 PetscBT bitmask=NULL; 3992 PetscErrorCode ierr; 3993 3994 PetscFunctionBegin; 3995 /* 3996 No need to setup local scatters if 3997 - primal space is unchanged 3998 AND 3999 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4000 AND 4001 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4002 */ 4003 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4004 PetscFunctionReturn(0); 4005 } 4006 /* destroy old objects */ 4007 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4008 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4009 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4010 /* Set Non-overlapping dimensions */ 4011 n_B = pcis->n_B; 4012 n_D = pcis->n - n_B; 4013 n_vertices = pcbddc->n_vertices; 4014 4015 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4016 4017 /* create auxiliary bitmask and allocate workspace */ 4018 if (!sub_schurs || !sub_schurs->reuse_solver) { 4019 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4020 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4021 for (i=0;i<n_vertices;i++) { 4022 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4023 } 4024 4025 for (i=0, n_R=0; i<pcis->n; i++) { 4026 if (!PetscBTLookup(bitmask,i)) { 4027 idx_R_local[n_R++] = i; 4028 } 4029 } 4030 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4031 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4032 4033 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4034 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4035 } 4036 4037 /* Block code */ 4038 vbs = 1; 4039 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4040 if (bs>1 && !(n_vertices%bs)) { 4041 PetscBool is_blocked = PETSC_TRUE; 4042 PetscInt *vary; 4043 if (!sub_schurs || !sub_schurs->reuse_solver) { 4044 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4045 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4046 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4047 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4048 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4049 for (i=0; i<pcis->n/bs; i++) { 4050 if (vary[i]!=0 && vary[i]!=bs) { 4051 is_blocked = PETSC_FALSE; 4052 break; 4053 } 4054 } 4055 ierr = PetscFree(vary);CHKERRQ(ierr); 4056 } else { 4057 /* Verify directly the R set */ 4058 for (i=0; i<n_R/bs; i++) { 4059 PetscInt j,node=idx_R_local[bs*i]; 4060 for (j=1; j<bs; j++) { 4061 if (node != idx_R_local[bs*i+j]-j) { 4062 is_blocked = PETSC_FALSE; 4063 break; 4064 } 4065 } 4066 } 4067 } 4068 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4069 vbs = bs; 4070 for (i=0;i<n_R/vbs;i++) { 4071 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4072 } 4073 } 4074 } 4075 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4076 if (sub_schurs && sub_schurs->reuse_solver) { 4077 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4078 4079 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4080 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4081 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4082 reuse_solver->is_R = pcbddc->is_R_local; 4083 } else { 4084 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4085 } 4086 4087 /* print some info if requested */ 4088 if (pcbddc->dbg_flag) { 4089 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4090 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4091 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4092 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4093 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4095 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4096 } 4097 4098 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4099 if (!sub_schurs || !sub_schurs->reuse_solver) { 4100 IS is_aux1,is_aux2; 4101 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4102 4103 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4104 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4105 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4106 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4107 for (i=0; i<n_D; i++) { 4108 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4109 } 4110 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4111 for (i=0, j=0; i<n_R; i++) { 4112 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4113 aux_array1[j++] = i; 4114 } 4115 } 4116 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4117 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4118 for (i=0, j=0; i<n_B; i++) { 4119 if (!PetscBTLookup(bitmask,is_indices[i])) { 4120 aux_array2[j++] = i; 4121 } 4122 } 4123 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4124 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4125 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4126 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4127 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4128 4129 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4130 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4131 for (i=0, j=0; i<n_R; i++) { 4132 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4133 aux_array1[j++] = i; 4134 } 4135 } 4136 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4137 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4138 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4139 } 4140 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4141 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4142 } else { 4143 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4144 IS tis; 4145 PetscInt schur_size; 4146 4147 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4148 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4149 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4150 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4151 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4152 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4153 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4154 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4155 } 4156 } 4157 PetscFunctionReturn(0); 4158 } 4159 4160 4161 #undef __FUNCT__ 4162 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4163 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4164 { 4165 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4166 PC_IS *pcis = (PC_IS*)pc->data; 4167 PC pc_temp; 4168 Mat A_RR; 4169 MatReuse reuse; 4170 PetscScalar m_one = -1.0; 4171 PetscReal value; 4172 PetscInt n_D,n_R; 4173 PetscBool check_corr[2],issbaij; 4174 PetscErrorCode ierr; 4175 /* prefixes stuff */ 4176 char dir_prefix[256],neu_prefix[256],str_level[16]; 4177 size_t len; 4178 4179 PetscFunctionBegin; 4180 4181 /* compute prefixes */ 4182 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4183 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4184 if (!pcbddc->current_level) { 4185 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4186 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4187 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4188 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4189 } else { 4190 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4191 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4192 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4193 len -= 15; /* remove "pc_bddc_coarse_" */ 4194 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4195 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4196 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4197 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4198 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4199 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4200 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4201 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4202 } 4203 4204 /* DIRICHLET PROBLEM */ 4205 if (dirichlet) { 4206 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4207 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4208 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4209 if (pcbddc->dbg_flag) { 4210 Mat A_IIn; 4211 4212 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4213 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4214 pcis->A_II = A_IIn; 4215 } 4216 } 4217 if (pcbddc->local_mat->symmetric_set) { 4218 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4219 } 4220 /* Matrix for Dirichlet problem is pcis->A_II */ 4221 n_D = pcis->n - pcis->n_B; 4222 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4223 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4224 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4225 /* default */ 4226 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4227 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4228 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4229 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4230 if (issbaij) { 4231 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4232 } else { 4233 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4234 } 4235 /* Allow user's customization */ 4236 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4237 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4238 } 4239 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4240 if (sub_schurs && sub_schurs->reuse_solver) { 4241 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4242 4243 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4244 } 4245 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4246 if (!n_D) { 4247 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4248 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4249 } 4250 /* Set Up KSP for Dirichlet problem of BDDC */ 4251 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4252 /* set ksp_D into pcis data */ 4253 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4254 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4255 pcis->ksp_D = pcbddc->ksp_D; 4256 } 4257 4258 /* NEUMANN PROBLEM */ 4259 A_RR = 0; 4260 if (neumann) { 4261 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4262 PetscInt ibs,mbs; 4263 PetscBool issbaij; 4264 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4265 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4266 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4267 if (pcbddc->ksp_R) { /* already created ksp */ 4268 PetscInt nn_R; 4269 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4270 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4271 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4272 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4273 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4274 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4275 reuse = MAT_INITIAL_MATRIX; 4276 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4277 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4278 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4279 reuse = MAT_INITIAL_MATRIX; 4280 } else { /* safe to reuse the matrix */ 4281 reuse = MAT_REUSE_MATRIX; 4282 } 4283 } 4284 /* last check */ 4285 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4286 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4287 reuse = MAT_INITIAL_MATRIX; 4288 } 4289 } else { /* first time, so we need to create the matrix */ 4290 reuse = MAT_INITIAL_MATRIX; 4291 } 4292 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4293 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4294 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4295 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4296 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4297 if (matis->A == pcbddc->local_mat) { 4298 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4299 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4300 } else { 4301 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4302 } 4303 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4304 if (matis->A == pcbddc->local_mat) { 4305 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4306 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4307 } else { 4308 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4309 } 4310 } 4311 /* extract A_RR */ 4312 if (sub_schurs && sub_schurs->reuse_solver) { 4313 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4314 4315 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4316 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4317 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4318 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4319 } else { 4320 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4321 } 4322 } else { 4323 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4324 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4325 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4326 } 4327 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4328 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4329 } 4330 if (pcbddc->local_mat->symmetric_set) { 4331 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4332 } 4333 if (!pcbddc->ksp_R) { /* create object if not present */ 4334 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4335 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4336 /* default */ 4337 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4338 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4339 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4340 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4341 if (issbaij) { 4342 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4343 } else { 4344 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4345 } 4346 /* Allow user's customization */ 4347 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4348 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4349 } 4350 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4351 if (!n_R) { 4352 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4353 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4354 } 4355 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4356 /* Reuse solver if it is present */ 4357 if (sub_schurs && sub_schurs->reuse_solver) { 4358 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4359 4360 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4361 } 4362 /* Set Up KSP for Neumann problem of BDDC */ 4363 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4364 } 4365 4366 if (pcbddc->dbg_flag) { 4367 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4368 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4369 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4370 } 4371 4372 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4373 check_corr[0] = check_corr[1] = PETSC_FALSE; 4374 if (pcbddc->NullSpace_corr[0]) { 4375 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4376 } 4377 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4378 check_corr[0] = PETSC_TRUE; 4379 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4380 } 4381 if (neumann && pcbddc->NullSpace_corr[2]) { 4382 check_corr[1] = PETSC_TRUE; 4383 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4384 } 4385 4386 /* check Dirichlet and Neumann solvers */ 4387 if (pcbddc->dbg_flag) { 4388 if (dirichlet) { /* Dirichlet */ 4389 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4390 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4391 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4392 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4393 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4394 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4395 if (check_corr[0]) { 4396 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4397 } 4398 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4399 } 4400 if (neumann) { /* Neumann */ 4401 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4402 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4403 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4404 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4405 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4406 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4407 if (check_corr[1]) { 4408 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4409 } 4410 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4411 } 4412 } 4413 /* free Neumann problem's matrix */ 4414 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4415 PetscFunctionReturn(0); 4416 } 4417 4418 #undef __FUNCT__ 4419 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4420 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4421 { 4422 PetscErrorCode ierr; 4423 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4424 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4425 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4426 4427 PetscFunctionBegin; 4428 if (!reuse_solver) { 4429 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4430 } 4431 if (!pcbddc->switch_static) { 4432 if (applytranspose && pcbddc->local_auxmat1) { 4433 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4434 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4435 } 4436 if (!reuse_solver) { 4437 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4438 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4439 } else { 4440 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4441 4442 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4443 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4444 } 4445 } else { 4446 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4447 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4448 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4449 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4450 if (applytranspose && pcbddc->local_auxmat1) { 4451 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4452 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4453 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4454 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4455 } 4456 } 4457 if (!reuse_solver || pcbddc->switch_static) { 4458 if (applytranspose) { 4459 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4460 } else { 4461 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4462 } 4463 } else { 4464 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4465 4466 if (applytranspose) { 4467 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4468 } else { 4469 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4470 } 4471 } 4472 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4473 if (!pcbddc->switch_static) { 4474 if (!reuse_solver) { 4475 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4476 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4477 } else { 4478 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4479 4480 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4481 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4482 } 4483 if (!applytranspose && pcbddc->local_auxmat1) { 4484 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4485 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4486 } 4487 } else { 4488 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4489 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4490 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4491 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4492 if (!applytranspose && pcbddc->local_auxmat1) { 4493 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4494 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4495 } 4496 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4497 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4498 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4499 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4500 } 4501 PetscFunctionReturn(0); 4502 } 4503 4504 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4505 #undef __FUNCT__ 4506 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4507 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4508 { 4509 PetscErrorCode ierr; 4510 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4511 PC_IS* pcis = (PC_IS*) (pc->data); 4512 const PetscScalar zero = 0.0; 4513 4514 PetscFunctionBegin; 4515 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4516 if (!pcbddc->benign_apply_coarse_only) { 4517 if (applytranspose) { 4518 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4519 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4520 } else { 4521 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4522 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4523 } 4524 } else { 4525 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4526 } 4527 4528 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4529 if (pcbddc->benign_n) { 4530 PetscScalar *array; 4531 PetscInt j; 4532 4533 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4534 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4535 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4536 } 4537 4538 /* start communications from local primal nodes to rhs of coarse solver */ 4539 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4540 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4541 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4542 4543 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4544 if (pcbddc->coarse_ksp) { 4545 Mat coarse_mat; 4546 Vec rhs,sol; 4547 MatNullSpace nullsp; 4548 PetscBool isbddc = PETSC_FALSE; 4549 4550 if (pcbddc->benign_have_null) { 4551 PC coarse_pc; 4552 4553 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4554 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4555 /* we need to propagate to coarser levels the need for a possible benign correction */ 4556 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4557 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4558 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4559 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4560 } 4561 } 4562 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4563 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4564 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4565 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4566 if (nullsp) { 4567 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4568 } 4569 if (applytranspose) { 4570 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4571 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4572 } else { 4573 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4574 PC coarse_pc; 4575 4576 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4577 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4578 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4579 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4580 } else { 4581 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4582 } 4583 } 4584 /* we don't need the benign correction at coarser levels anymore */ 4585 if (pcbddc->benign_have_null && isbddc) { 4586 PC coarse_pc; 4587 PC_BDDC* coarsepcbddc; 4588 4589 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4590 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4591 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4592 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4593 } 4594 if (nullsp) { 4595 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4596 } 4597 } 4598 4599 /* Local solution on R nodes */ 4600 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4601 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4602 } 4603 /* communications from coarse sol to local primal nodes */ 4604 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4605 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4606 4607 /* Sum contributions from the two levels */ 4608 if (!pcbddc->benign_apply_coarse_only) { 4609 if (applytranspose) { 4610 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4611 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4612 } else { 4613 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4614 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4615 } 4616 /* store p0 */ 4617 if (pcbddc->benign_n) { 4618 PetscScalar *array; 4619 PetscInt j; 4620 4621 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4622 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4623 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4624 } 4625 } else { /* expand the coarse solution */ 4626 if (applytranspose) { 4627 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4628 } else { 4629 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4630 } 4631 } 4632 PetscFunctionReturn(0); 4633 } 4634 4635 #undef __FUNCT__ 4636 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4637 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4638 { 4639 PetscErrorCode ierr; 4640 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4641 PetscScalar *array; 4642 Vec from,to; 4643 4644 PetscFunctionBegin; 4645 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4646 from = pcbddc->coarse_vec; 4647 to = pcbddc->vec1_P; 4648 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4649 Vec tvec; 4650 4651 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4652 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4653 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4654 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4655 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4656 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4657 } 4658 } else { /* from local to global -> put data in coarse right hand side */ 4659 from = pcbddc->vec1_P; 4660 to = pcbddc->coarse_vec; 4661 } 4662 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4663 PetscFunctionReturn(0); 4664 } 4665 4666 #undef __FUNCT__ 4667 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4668 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4669 { 4670 PetscErrorCode ierr; 4671 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4672 PetscScalar *array; 4673 Vec from,to; 4674 4675 PetscFunctionBegin; 4676 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4677 from = pcbddc->coarse_vec; 4678 to = pcbddc->vec1_P; 4679 } else { /* from local to global -> put data in coarse right hand side */ 4680 from = pcbddc->vec1_P; 4681 to = pcbddc->coarse_vec; 4682 } 4683 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4684 if (smode == SCATTER_FORWARD) { 4685 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4686 Vec tvec; 4687 4688 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4689 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4690 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4691 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4692 } 4693 } else { 4694 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4695 ierr = VecResetArray(from);CHKERRQ(ierr); 4696 } 4697 } 4698 PetscFunctionReturn(0); 4699 } 4700 4701 /* uncomment for testing purposes */ 4702 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4703 #undef __FUNCT__ 4704 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4705 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4706 { 4707 PetscErrorCode ierr; 4708 PC_IS* pcis = (PC_IS*)(pc->data); 4709 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4710 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4711 /* one and zero */ 4712 PetscScalar one=1.0,zero=0.0; 4713 /* space to store constraints and their local indices */ 4714 PetscScalar *constraints_data; 4715 PetscInt *constraints_idxs,*constraints_idxs_B; 4716 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4717 PetscInt *constraints_n; 4718 /* iterators */ 4719 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4720 /* BLAS integers */ 4721 PetscBLASInt lwork,lierr; 4722 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4723 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4724 /* reuse */ 4725 PetscInt olocal_primal_size,olocal_primal_size_cc; 4726 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4727 /* change of basis */ 4728 PetscBool qr_needed; 4729 PetscBT change_basis,qr_needed_idx; 4730 /* auxiliary stuff */ 4731 PetscInt *nnz,*is_indices; 4732 PetscInt ncc; 4733 /* some quantities */ 4734 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4735 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4736 4737 PetscFunctionBegin; 4738 /* Destroy Mat objects computed previously */ 4739 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4740 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4741 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4742 /* save info on constraints from previous setup (if any) */ 4743 olocal_primal_size = pcbddc->local_primal_size; 4744 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4745 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4746 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4747 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4748 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4749 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4750 4751 if (!pcbddc->adaptive_selection) { 4752 IS ISForVertices,*ISForFaces,*ISForEdges; 4753 MatNullSpace nearnullsp; 4754 const Vec *nearnullvecs; 4755 Vec *localnearnullsp; 4756 PetscScalar *array; 4757 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4758 PetscBool nnsp_has_cnst; 4759 /* LAPACK working arrays for SVD or POD */ 4760 PetscBool skip_lapack,boolforchange; 4761 PetscScalar *work; 4762 PetscReal *singular_vals; 4763 #if defined(PETSC_USE_COMPLEX) 4764 PetscReal *rwork; 4765 #endif 4766 #if defined(PETSC_MISSING_LAPACK_GESVD) 4767 PetscScalar *temp_basis,*correlation_mat; 4768 #else 4769 PetscBLASInt dummy_int=1; 4770 PetscScalar dummy_scalar=1.; 4771 #endif 4772 4773 /* Get index sets for faces, edges and vertices from graph */ 4774 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4775 /* print some info */ 4776 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 4777 PetscInt nv; 4778 4779 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4780 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4781 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4782 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4783 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4784 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4785 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4786 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4787 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4788 } 4789 4790 /* free unneeded index sets */ 4791 if (!pcbddc->use_vertices) { 4792 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4793 } 4794 if (!pcbddc->use_edges) { 4795 for (i=0;i<n_ISForEdges;i++) { 4796 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4797 } 4798 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4799 n_ISForEdges = 0; 4800 } 4801 if (!pcbddc->use_faces) { 4802 for (i=0;i<n_ISForFaces;i++) { 4803 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4804 } 4805 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4806 n_ISForFaces = 0; 4807 } 4808 4809 /* check if near null space is attached to global mat */ 4810 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4811 if (nearnullsp) { 4812 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4813 /* remove any stored info */ 4814 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4815 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4816 /* store information for BDDC solver reuse */ 4817 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4818 pcbddc->onearnullspace = nearnullsp; 4819 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4820 for (i=0;i<nnsp_size;i++) { 4821 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4822 } 4823 } else { /* if near null space is not provided BDDC uses constants by default */ 4824 nnsp_size = 0; 4825 nnsp_has_cnst = PETSC_TRUE; 4826 } 4827 /* get max number of constraints on a single cc */ 4828 max_constraints = nnsp_size; 4829 if (nnsp_has_cnst) max_constraints++; 4830 4831 /* 4832 Evaluate maximum storage size needed by the procedure 4833 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4834 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4835 There can be multiple constraints per connected component 4836 */ 4837 n_vertices = 0; 4838 if (ISForVertices) { 4839 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4840 } 4841 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4842 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4843 4844 total_counts = n_ISForFaces+n_ISForEdges; 4845 total_counts *= max_constraints; 4846 total_counts += n_vertices; 4847 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4848 4849 total_counts = 0; 4850 max_size_of_constraint = 0; 4851 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4852 IS used_is; 4853 if (i<n_ISForEdges) { 4854 used_is = ISForEdges[i]; 4855 } else { 4856 used_is = ISForFaces[i-n_ISForEdges]; 4857 } 4858 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4859 total_counts += j; 4860 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4861 } 4862 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4863 4864 /* get local part of global near null space vectors */ 4865 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4866 for (k=0;k<nnsp_size;k++) { 4867 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4868 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4869 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4870 } 4871 4872 /* whether or not to skip lapack calls */ 4873 skip_lapack = PETSC_TRUE; 4874 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4875 4876 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4877 if (!skip_lapack) { 4878 PetscScalar temp_work; 4879 4880 #if defined(PETSC_MISSING_LAPACK_GESVD) 4881 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4882 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4883 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4884 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4885 #if defined(PETSC_USE_COMPLEX) 4886 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4887 #endif 4888 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4889 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4890 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4891 lwork = -1; 4892 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4893 #if !defined(PETSC_USE_COMPLEX) 4894 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4895 #else 4896 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4897 #endif 4898 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4899 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4900 #else /* on missing GESVD */ 4901 /* SVD */ 4902 PetscInt max_n,min_n; 4903 max_n = max_size_of_constraint; 4904 min_n = max_constraints; 4905 if (max_size_of_constraint < max_constraints) { 4906 min_n = max_size_of_constraint; 4907 max_n = max_constraints; 4908 } 4909 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 4910 #if defined(PETSC_USE_COMPLEX) 4911 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 4912 #endif 4913 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4914 lwork = -1; 4915 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 4916 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 4917 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 4918 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4919 #if !defined(PETSC_USE_COMPLEX) 4920 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 4921 #else 4922 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 4923 #endif 4924 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4925 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 4926 #endif /* on missing GESVD */ 4927 /* Allocate optimal workspace */ 4928 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 4929 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 4930 } 4931 /* Now we can loop on constraining sets */ 4932 total_counts = 0; 4933 constraints_idxs_ptr[0] = 0; 4934 constraints_data_ptr[0] = 0; 4935 /* vertices */ 4936 if (n_vertices) { 4937 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4938 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4939 for (i=0;i<n_vertices;i++) { 4940 constraints_n[total_counts] = 1; 4941 constraints_data[total_counts] = 1.0; 4942 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 4943 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 4944 total_counts++; 4945 } 4946 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4947 n_vertices = total_counts; 4948 } 4949 4950 /* edges and faces */ 4951 total_counts_cc = total_counts; 4952 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 4953 IS used_is; 4954 PetscBool idxs_copied = PETSC_FALSE; 4955 4956 if (ncc<n_ISForEdges) { 4957 used_is = ISForEdges[ncc]; 4958 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 4959 } else { 4960 used_is = ISForFaces[ncc-n_ISForEdges]; 4961 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 4962 } 4963 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 4964 4965 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 4966 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4967 /* change of basis should not be performed on local periodic nodes */ 4968 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 4969 if (nnsp_has_cnst) { 4970 PetscScalar quad_value; 4971 4972 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4973 idxs_copied = PETSC_TRUE; 4974 4975 if (!pcbddc->use_nnsp_true) { 4976 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 4977 } else { 4978 quad_value = 1.0; 4979 } 4980 for (j=0;j<size_of_constraint;j++) { 4981 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 4982 } 4983 temp_constraints++; 4984 total_counts++; 4985 } 4986 for (k=0;k<nnsp_size;k++) { 4987 PetscReal real_value; 4988 PetscScalar *ptr_to_data; 4989 4990 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4991 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 4992 for (j=0;j<size_of_constraint;j++) { 4993 ptr_to_data[j] = array[is_indices[j]]; 4994 } 4995 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4996 /* check if array is null on the connected component */ 4997 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4998 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 4999 if (real_value > 0.0) { /* keep indices and values */ 5000 temp_constraints++; 5001 total_counts++; 5002 if (!idxs_copied) { 5003 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5004 idxs_copied = PETSC_TRUE; 5005 } 5006 } 5007 } 5008 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5009 valid_constraints = temp_constraints; 5010 if (!pcbddc->use_nnsp_true && temp_constraints) { 5011 if (temp_constraints == 1) { /* just normalize the constraint */ 5012 PetscScalar norm,*ptr_to_data; 5013 5014 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5015 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5016 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5017 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5018 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5019 } else { /* perform SVD */ 5020 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5021 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5022 5023 #if defined(PETSC_MISSING_LAPACK_GESVD) 5024 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5025 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5026 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5027 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5028 from that computed using LAPACKgesvd 5029 -> This is due to a different computation of eigenvectors in LAPACKheev 5030 -> The quality of the POD-computed basis will be the same */ 5031 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5032 /* Store upper triangular part of correlation matrix */ 5033 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5034 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5035 for (j=0;j<temp_constraints;j++) { 5036 for (k=0;k<j+1;k++) { 5037 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5038 } 5039 } 5040 /* compute eigenvalues and eigenvectors of correlation matrix */ 5041 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5042 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5043 #if !defined(PETSC_USE_COMPLEX) 5044 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5045 #else 5046 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5047 #endif 5048 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5049 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5050 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5051 j = 0; 5052 while (j < temp_constraints && singular_vals[j] < tol) j++; 5053 total_counts = total_counts-j; 5054 valid_constraints = temp_constraints-j; 5055 /* scale and copy POD basis into used quadrature memory */ 5056 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5057 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5058 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5059 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5060 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5061 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5062 if (j<temp_constraints) { 5063 PetscInt ii; 5064 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5065 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5066 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5067 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5068 for (k=0;k<temp_constraints-j;k++) { 5069 for (ii=0;ii<size_of_constraint;ii++) { 5070 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5071 } 5072 } 5073 } 5074 #else /* on missing GESVD */ 5075 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5076 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5077 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5078 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5079 #if !defined(PETSC_USE_COMPLEX) 5080 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5081 #else 5082 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5083 #endif 5084 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5085 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5086 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5087 k = temp_constraints; 5088 if (k > size_of_constraint) k = size_of_constraint; 5089 j = 0; 5090 while (j < k && singular_vals[k-j-1] < tol) j++; 5091 valid_constraints = k-j; 5092 total_counts = total_counts-temp_constraints+valid_constraints; 5093 #endif /* on missing GESVD */ 5094 } 5095 } 5096 /* update pointers information */ 5097 if (valid_constraints) { 5098 constraints_n[total_counts_cc] = valid_constraints; 5099 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5100 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5101 /* set change_of_basis flag */ 5102 if (boolforchange) { 5103 PetscBTSet(change_basis,total_counts_cc); 5104 } 5105 total_counts_cc++; 5106 } 5107 } 5108 /* free workspace */ 5109 if (!skip_lapack) { 5110 ierr = PetscFree(work);CHKERRQ(ierr); 5111 #if defined(PETSC_USE_COMPLEX) 5112 ierr = PetscFree(rwork);CHKERRQ(ierr); 5113 #endif 5114 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5115 #if defined(PETSC_MISSING_LAPACK_GESVD) 5116 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5117 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5118 #endif 5119 } 5120 for (k=0;k<nnsp_size;k++) { 5121 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5122 } 5123 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5124 /* free index sets of faces, edges and vertices */ 5125 for (i=0;i<n_ISForFaces;i++) { 5126 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5127 } 5128 if (n_ISForFaces) { 5129 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5130 } 5131 for (i=0;i<n_ISForEdges;i++) { 5132 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5133 } 5134 if (n_ISForEdges) { 5135 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5136 } 5137 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5138 } else { 5139 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5140 5141 total_counts = 0; 5142 n_vertices = 0; 5143 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5144 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5145 } 5146 max_constraints = 0; 5147 total_counts_cc = 0; 5148 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5149 total_counts += pcbddc->adaptive_constraints_n[i]; 5150 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5151 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5152 } 5153 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5154 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5155 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5156 constraints_data = pcbddc->adaptive_constraints_data; 5157 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5158 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5159 total_counts_cc = 0; 5160 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5161 if (pcbddc->adaptive_constraints_n[i]) { 5162 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5163 } 5164 } 5165 #if 0 5166 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5167 for (i=0;i<total_counts_cc;i++) { 5168 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5169 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5170 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5171 printf(" %d",constraints_idxs[j]); 5172 } 5173 printf("\n"); 5174 printf("number of cc: %d\n",constraints_n[i]); 5175 } 5176 for (i=0;i<n_vertices;i++) { 5177 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5178 } 5179 for (i=0;i<sub_schurs->n_subs;i++) { 5180 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5181 } 5182 #endif 5183 5184 max_size_of_constraint = 0; 5185 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5186 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5187 /* Change of basis */ 5188 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5189 if (pcbddc->use_change_of_basis) { 5190 for (i=0;i<sub_schurs->n_subs;i++) { 5191 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5192 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5193 } 5194 } 5195 } 5196 } 5197 pcbddc->local_primal_size = total_counts; 5198 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5199 5200 /* map constraints_idxs in boundary numbering */ 5201 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5202 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5203 5204 /* Create constraint matrix */ 5205 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5206 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5207 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5208 5209 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5210 /* determine if a QR strategy is needed for change of basis */ 5211 qr_needed = PETSC_FALSE; 5212 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5213 total_primal_vertices=0; 5214 pcbddc->local_primal_size_cc = 0; 5215 for (i=0;i<total_counts_cc;i++) { 5216 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5217 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5218 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5219 pcbddc->local_primal_size_cc += 1; 5220 } else if (PetscBTLookup(change_basis,i)) { 5221 for (k=0;k<constraints_n[i];k++) { 5222 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5223 } 5224 pcbddc->local_primal_size_cc += constraints_n[i]; 5225 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5226 PetscBTSet(qr_needed_idx,i); 5227 qr_needed = PETSC_TRUE; 5228 } 5229 } else { 5230 pcbddc->local_primal_size_cc += 1; 5231 } 5232 } 5233 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5234 pcbddc->n_vertices = total_primal_vertices; 5235 /* permute indices in order to have a sorted set of vertices */ 5236 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5237 5238 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5239 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5240 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5241 5242 /* nonzero structure of constraint matrix */ 5243 /* and get reference dof for local constraints */ 5244 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5245 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5246 5247 j = total_primal_vertices; 5248 total_counts = total_primal_vertices; 5249 cum = total_primal_vertices; 5250 for (i=n_vertices;i<total_counts_cc;i++) { 5251 if (!PetscBTLookup(change_basis,i)) { 5252 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5253 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5254 cum++; 5255 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5256 for (k=0;k<constraints_n[i];k++) { 5257 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5258 nnz[j+k] = size_of_constraint; 5259 } 5260 j += constraints_n[i]; 5261 } 5262 } 5263 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5264 ierr = PetscFree(nnz);CHKERRQ(ierr); 5265 5266 /* set values in constraint matrix */ 5267 for (i=0;i<total_primal_vertices;i++) { 5268 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5269 } 5270 total_counts = total_primal_vertices; 5271 for (i=n_vertices;i<total_counts_cc;i++) { 5272 if (!PetscBTLookup(change_basis,i)) { 5273 PetscInt *cols; 5274 5275 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5276 cols = constraints_idxs+constraints_idxs_ptr[i]; 5277 for (k=0;k<constraints_n[i];k++) { 5278 PetscInt row = total_counts+k; 5279 PetscScalar *vals; 5280 5281 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5282 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5283 } 5284 total_counts += constraints_n[i]; 5285 } 5286 } 5287 /* assembling */ 5288 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5289 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5290 5291 /* 5292 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5293 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5294 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5295 */ 5296 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5297 if (pcbddc->use_change_of_basis) { 5298 /* dual and primal dofs on a single cc */ 5299 PetscInt dual_dofs,primal_dofs; 5300 /* working stuff for GEQRF */ 5301 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5302 PetscBLASInt lqr_work; 5303 /* working stuff for UNGQR */ 5304 PetscScalar *gqr_work,lgqr_work_t; 5305 PetscBLASInt lgqr_work; 5306 /* working stuff for TRTRS */ 5307 PetscScalar *trs_rhs; 5308 PetscBLASInt Blas_NRHS; 5309 /* pointers for values insertion into change of basis matrix */ 5310 PetscInt *start_rows,*start_cols; 5311 PetscScalar *start_vals; 5312 /* working stuff for values insertion */ 5313 PetscBT is_primal; 5314 PetscInt *aux_primal_numbering_B; 5315 /* matrix sizes */ 5316 PetscInt global_size,local_size; 5317 /* temporary change of basis */ 5318 Mat localChangeOfBasisMatrix; 5319 /* extra space for debugging */ 5320 PetscScalar *dbg_work; 5321 5322 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5323 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5324 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5325 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5326 /* nonzeros for local mat */ 5327 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5328 if (!pcbddc->benign_change || pcbddc->fake_change) { 5329 for (i=0;i<pcis->n;i++) nnz[i]=1; 5330 } else { 5331 const PetscInt *ii; 5332 PetscInt n; 5333 PetscBool flg_row; 5334 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5335 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5336 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5337 } 5338 for (i=n_vertices;i<total_counts_cc;i++) { 5339 if (PetscBTLookup(change_basis,i)) { 5340 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5341 if (PetscBTLookup(qr_needed_idx,i)) { 5342 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5343 } else { 5344 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5345 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5346 } 5347 } 5348 } 5349 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5350 ierr = PetscFree(nnz);CHKERRQ(ierr); 5351 /* Set interior change in the matrix */ 5352 if (!pcbddc->benign_change || pcbddc->fake_change) { 5353 for (i=0;i<pcis->n;i++) { 5354 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5355 } 5356 } else { 5357 const PetscInt *ii,*jj; 5358 PetscScalar *aa; 5359 PetscInt n; 5360 PetscBool flg_row; 5361 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5362 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5363 for (i=0;i<n;i++) { 5364 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5365 } 5366 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5367 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5368 } 5369 5370 if (pcbddc->dbg_flag) { 5371 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5372 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5373 } 5374 5375 5376 /* Now we loop on the constraints which need a change of basis */ 5377 /* 5378 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5379 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5380 5381 Basic blocks of change of basis matrix T computed by 5382 5383 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5384 5385 | 1 0 ... 0 s_1/S | 5386 | 0 1 ... 0 s_2/S | 5387 | ... | 5388 | 0 ... 1 s_{n-1}/S | 5389 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5390 5391 with S = \sum_{i=1}^n s_i^2 5392 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5393 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5394 5395 - QR decomposition of constraints otherwise 5396 */ 5397 if (qr_needed) { 5398 /* space to store Q */ 5399 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5400 /* array to store scaling factors for reflectors */ 5401 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5402 /* first we issue queries for optimal work */ 5403 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5404 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5405 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5406 lqr_work = -1; 5407 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5408 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5409 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5410 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5411 lgqr_work = -1; 5412 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5413 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5414 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5415 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5416 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5417 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5418 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5419 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5420 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5421 /* array to store rhs and solution of triangular solver */ 5422 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5423 /* allocating workspace for check */ 5424 if (pcbddc->dbg_flag) { 5425 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5426 } 5427 } 5428 /* array to store whether a node is primal or not */ 5429 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5430 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5431 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5432 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5433 for (i=0;i<total_primal_vertices;i++) { 5434 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5435 } 5436 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5437 5438 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5439 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5440 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5441 if (PetscBTLookup(change_basis,total_counts)) { 5442 /* get constraint info */ 5443 primal_dofs = constraints_n[total_counts]; 5444 dual_dofs = size_of_constraint-primal_dofs; 5445 5446 if (pcbddc->dbg_flag) { 5447 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5448 } 5449 5450 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5451 5452 /* copy quadrature constraints for change of basis check */ 5453 if (pcbddc->dbg_flag) { 5454 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5455 } 5456 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5457 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5458 5459 /* compute QR decomposition of constraints */ 5460 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5461 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5462 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5463 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5464 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5465 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5466 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5467 5468 /* explictly compute R^-T */ 5469 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5470 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5471 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5472 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5473 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5474 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5475 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5476 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5477 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5478 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5479 5480 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5481 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5482 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5483 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5484 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5485 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5486 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5487 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5488 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5489 5490 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5491 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5492 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5493 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5494 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5495 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5496 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5497 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5499 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5500 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5501 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5502 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5503 5504 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5505 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5506 /* insert cols for primal dofs */ 5507 for (j=0;j<primal_dofs;j++) { 5508 start_vals = &qr_basis[j*size_of_constraint]; 5509 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5510 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5511 } 5512 /* insert cols for dual dofs */ 5513 for (j=0,k=0;j<dual_dofs;k++) { 5514 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5515 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5516 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5517 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5518 j++; 5519 } 5520 } 5521 5522 /* check change of basis */ 5523 if (pcbddc->dbg_flag) { 5524 PetscInt ii,jj; 5525 PetscBool valid_qr=PETSC_TRUE; 5526 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5527 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5529 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5530 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5531 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5532 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5533 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5534 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5535 for (jj=0;jj<size_of_constraint;jj++) { 5536 for (ii=0;ii<primal_dofs;ii++) { 5537 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5538 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5539 } 5540 } 5541 if (!valid_qr) { 5542 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5543 for (jj=0;jj<size_of_constraint;jj++) { 5544 for (ii=0;ii<primal_dofs;ii++) { 5545 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5546 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5547 } 5548 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5549 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5550 } 5551 } 5552 } 5553 } else { 5554 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5555 } 5556 } 5557 } else { /* simple transformation block */ 5558 PetscInt row,col; 5559 PetscScalar val,norm; 5560 5561 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5562 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5563 for (j=0;j<size_of_constraint;j++) { 5564 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5565 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5566 if (!PetscBTLookup(is_primal,row_B)) { 5567 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5568 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5569 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5570 } else { 5571 for (k=0;k<size_of_constraint;k++) { 5572 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5573 if (row != col) { 5574 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5575 } else { 5576 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5577 } 5578 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5579 } 5580 } 5581 } 5582 if (pcbddc->dbg_flag) { 5583 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5584 } 5585 } 5586 } else { 5587 if (pcbddc->dbg_flag) { 5588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5589 } 5590 } 5591 } 5592 5593 /* free workspace */ 5594 if (qr_needed) { 5595 if (pcbddc->dbg_flag) { 5596 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5597 } 5598 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5599 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5600 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5601 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5602 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5603 } 5604 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5605 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5606 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5607 5608 /* assembling of global change of variable */ 5609 if (!pcbddc->fake_change) { 5610 Mat tmat; 5611 PetscInt bs; 5612 5613 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5614 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5615 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5616 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5617 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5618 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5619 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5620 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5621 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5622 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5623 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5624 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5625 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5626 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5627 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5628 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5629 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5630 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5631 5632 /* check */ 5633 if (pcbddc->dbg_flag) { 5634 PetscReal error; 5635 Vec x,x_change; 5636 5637 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5638 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5639 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5640 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5641 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5642 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5643 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5644 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5645 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5646 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5647 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5648 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5649 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5650 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5651 ierr = VecDestroy(&x);CHKERRQ(ierr); 5652 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5653 } 5654 /* adapt sub_schurs computed (if any) */ 5655 if (pcbddc->use_deluxe_scaling) { 5656 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5657 5658 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5659 if (sub_schurs && sub_schurs->S_Ej_all) { 5660 Mat S_new,tmat; 5661 IS is_all_N,is_V_Sall = NULL; 5662 5663 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5664 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5665 if (pcbddc->deluxe_zerorows) { 5666 ISLocalToGlobalMapping NtoSall; 5667 IS is_V; 5668 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5669 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5670 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5671 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5672 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5673 } 5674 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5675 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5676 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5677 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5678 if (pcbddc->deluxe_zerorows) { 5679 const PetscScalar *array; 5680 const PetscInt *idxs_V,*idxs_all; 5681 PetscInt i,n_V; 5682 5683 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5684 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5685 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5686 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5687 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5688 for (i=0;i<n_V;i++) { 5689 PetscScalar val; 5690 PetscInt idx; 5691 5692 idx = idxs_V[i]; 5693 val = array[idxs_all[idxs_V[i]]]; 5694 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5695 } 5696 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5697 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5698 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5699 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5700 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5701 } 5702 sub_schurs->S_Ej_all = S_new; 5703 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5704 if (sub_schurs->sum_S_Ej_all) { 5705 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5706 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5707 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5708 if (pcbddc->deluxe_zerorows) { 5709 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5710 } 5711 sub_schurs->sum_S_Ej_all = S_new; 5712 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5713 } 5714 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5715 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5716 } 5717 /* destroy any change of basis context in sub_schurs */ 5718 if (sub_schurs && sub_schurs->change) { 5719 PetscInt i; 5720 5721 for (i=0;i<sub_schurs->n_subs;i++) { 5722 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5723 } 5724 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5725 } 5726 } 5727 if (pcbddc->switch_static) { /* need to save the local change */ 5728 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5729 } else { 5730 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5731 } 5732 /* determine if any process has changed the pressures locally */ 5733 pcbddc->change_interior = pcbddc->benign_have_null; 5734 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5735 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5736 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5737 pcbddc->use_qr_single = qr_needed; 5738 } 5739 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5740 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5741 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5742 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5743 } else { 5744 Mat benign_global = NULL; 5745 if (pcbddc->benign_have_null) { 5746 Mat tmat; 5747 5748 pcbddc->change_interior = PETSC_TRUE; 5749 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5750 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5751 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5752 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5753 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5754 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5755 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5756 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5757 if (pcbddc->benign_change) { 5758 Mat M; 5759 5760 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5761 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5762 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5763 ierr = MatDestroy(&M);CHKERRQ(ierr); 5764 } else { 5765 Mat eye; 5766 PetscScalar *array; 5767 5768 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5769 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5770 for (i=0;i<pcis->n;i++) { 5771 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5772 } 5773 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5774 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5775 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5776 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5777 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5778 } 5779 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5780 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5781 } 5782 if (pcbddc->user_ChangeOfBasisMatrix) { 5783 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5784 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5785 } else if (pcbddc->benign_have_null) { 5786 pcbddc->ChangeOfBasisMatrix = benign_global; 5787 } 5788 } 5789 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5790 IS is_global; 5791 const PetscInt *gidxs; 5792 5793 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5794 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5795 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5796 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5797 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5798 } 5799 } 5800 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5801 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5802 } 5803 5804 if (!pcbddc->fake_change) { 5805 /* add pressure dofs to set of primal nodes for numbering purposes */ 5806 for (i=0;i<pcbddc->benign_n;i++) { 5807 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5808 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5809 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5810 pcbddc->local_primal_size_cc++; 5811 pcbddc->local_primal_size++; 5812 } 5813 5814 /* check if a new primal space has been introduced (also take into account benign trick) */ 5815 pcbddc->new_primal_space_local = PETSC_TRUE; 5816 if (olocal_primal_size == pcbddc->local_primal_size) { 5817 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5818 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5819 if (!pcbddc->new_primal_space_local) { 5820 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5821 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5822 } 5823 } 5824 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5825 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5826 } 5827 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5828 5829 /* flush dbg viewer */ 5830 if (pcbddc->dbg_flag) { 5831 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5832 } 5833 5834 /* free workspace */ 5835 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5836 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5837 if (!pcbddc->adaptive_selection) { 5838 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5839 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5840 } else { 5841 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5842 pcbddc->adaptive_constraints_idxs_ptr, 5843 pcbddc->adaptive_constraints_data_ptr, 5844 pcbddc->adaptive_constraints_idxs, 5845 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5846 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5847 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5848 } 5849 PetscFunctionReturn(0); 5850 } 5851 5852 #undef __FUNCT__ 5853 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5854 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5855 { 5856 ISLocalToGlobalMapping map; 5857 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5858 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5859 PetscInt ierr,i,N; 5860 5861 PetscFunctionBegin; 5862 if (pcbddc->graphanalyzed) PetscFunctionReturn(0); 5863 /* Reset previously computed graph */ 5864 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5865 /* Init local Graph struct */ 5866 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5867 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5868 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5869 5870 /* Check validity of the csr graph passed in by the user */ 5871 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5872 5873 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5874 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5875 PetscInt *xadj,*adjncy; 5876 PetscInt nvtxs; 5877 PetscBool flg_row=PETSC_FALSE; 5878 5879 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5880 if (flg_row) { 5881 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5882 pcbddc->computed_rowadj = PETSC_TRUE; 5883 } 5884 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5885 } 5886 if (pcbddc->dbg_flag) { 5887 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5888 } 5889 5890 /* Setup of Graph */ 5891 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5892 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5893 5894 /* attach info on disconnected subdomains if present */ 5895 if (pcbddc->n_local_subs) { 5896 PetscInt *local_subs; 5897 5898 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5899 for (i=0;i<pcbddc->n_local_subs;i++) { 5900 const PetscInt *idxs; 5901 PetscInt nl,j; 5902 5903 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5904 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5905 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 5906 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5907 } 5908 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 5909 pcbddc->mat_graph->local_subs = local_subs; 5910 } 5911 5912 /* Graph's connected components analysis */ 5913 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 5914 5915 /* set flag indicating analysis has been done */ 5916 pcbddc->graphanalyzed = PETSC_TRUE; 5917 PetscFunctionReturn(0); 5918 } 5919 5920 #undef __FUNCT__ 5921 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 5922 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 5923 { 5924 PetscInt i,j; 5925 PetscScalar *alphas; 5926 PetscErrorCode ierr; 5927 5928 PetscFunctionBegin; 5929 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 5930 for (i=0;i<n;i++) { 5931 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 5932 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 5933 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 5934 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5935 } 5936 ierr = PetscFree(alphas);CHKERRQ(ierr); 5937 PetscFunctionReturn(0); 5938 } 5939 5940 #undef __FUNCT__ 5941 #define __FUNCT__ "MatISGetSubassemblingPattern" 5942 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5943 { 5944 Mat A; 5945 PetscInt n_neighs,*neighs,*n_shared,**shared; 5946 PetscMPIInt size,rank,color; 5947 PetscInt *xadj,*adjncy; 5948 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5949 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5950 PetscInt void_procs,*procs_candidates = NULL; 5951 PetscInt xadj_count, *count; 5952 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5953 PetscSubcomm psubcomm; 5954 MPI_Comm subcomm; 5955 PetscErrorCode ierr; 5956 5957 PetscFunctionBegin; 5958 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5959 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5960 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5961 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5962 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5963 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5964 5965 if (have_void) *have_void = PETSC_FALSE; 5966 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5967 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5968 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5969 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5970 im_active = !!(n); 5971 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5972 void_procs = size - active_procs; 5973 /* get ranks of of non-active processes in mat communicator */ 5974 if (void_procs) { 5975 PetscInt ncand; 5976 5977 if (have_void) *have_void = PETSC_TRUE; 5978 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5979 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5980 for (i=0,ncand=0;i<size;i++) { 5981 if (!procs_candidates[i]) { 5982 procs_candidates[ncand++] = i; 5983 } 5984 } 5985 /* force n_subdomains to be not greater that the number of non-active processes */ 5986 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5987 } 5988 5989 /* number of subdomains requested greater than active processes -> just shift the matrix 5990 number of subdomains requested 1 -> send to master or first candidate in voids */ 5991 if (active_procs < *n_subdomains || *n_subdomains == 1) { 5992 PetscInt issize,isidx,dest; 5993 if (*n_subdomains == 1) dest = 0; 5994 else dest = rank; 5995 if (im_active) { 5996 issize = 1; 5997 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5998 isidx = procs_candidates[dest]; 5999 } else { 6000 isidx = dest; 6001 } 6002 } else { 6003 issize = 0; 6004 isidx = -1; 6005 } 6006 *n_subdomains = active_procs; 6007 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6008 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6009 PetscFunctionReturn(0); 6010 } 6011 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6012 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6013 threshold = PetscMax(threshold,2); 6014 6015 /* Get info on mapping */ 6016 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 6017 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6018 6019 /* build local CSR graph of subdomains' connectivity */ 6020 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6021 xadj[0] = 0; 6022 xadj[1] = PetscMax(n_neighs-1,0); 6023 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6024 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6025 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 6026 for (i=1;i<n_neighs;i++) 6027 for (j=0;j<n_shared[i];j++) 6028 count[shared[i][j]] += 1; 6029 6030 xadj_count = 0; 6031 for (i=1;i<n_neighs;i++) { 6032 for (j=0;j<n_shared[i];j++) { 6033 if (count[shared[i][j]] < threshold) { 6034 adjncy[xadj_count] = neighs[i]; 6035 adjncy_wgt[xadj_count] = n_shared[i]; 6036 xadj_count++; 6037 break; 6038 } 6039 } 6040 } 6041 xadj[1] = xadj_count; 6042 ierr = PetscFree(count);CHKERRQ(ierr); 6043 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6044 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6045 6046 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6047 6048 /* Restrict work on active processes only */ 6049 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6050 if (void_procs) { 6051 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6052 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6053 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6054 subcomm = PetscSubcommChild(psubcomm); 6055 } else { 6056 psubcomm = NULL; 6057 subcomm = PetscObjectComm((PetscObject)mat); 6058 } 6059 6060 v_wgt = NULL; 6061 if (!color) { 6062 ierr = PetscFree(xadj);CHKERRQ(ierr); 6063 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6064 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6065 } else { 6066 Mat subdomain_adj; 6067 IS new_ranks,new_ranks_contig; 6068 MatPartitioning partitioner; 6069 PetscInt rstart=0,rend=0; 6070 PetscInt *is_indices,*oldranks; 6071 PetscMPIInt size; 6072 PetscBool aggregate; 6073 6074 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6075 if (void_procs) { 6076 PetscInt prank = rank; 6077 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6078 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6079 for (i=0;i<xadj[1];i++) { 6080 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6081 } 6082 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6083 } else { 6084 oldranks = NULL; 6085 } 6086 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6087 if (aggregate) { /* TODO: all this part could be made more efficient */ 6088 PetscInt lrows,row,ncols,*cols; 6089 PetscMPIInt nrank; 6090 PetscScalar *vals; 6091 6092 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6093 lrows = 0; 6094 if (nrank<redprocs) { 6095 lrows = size/redprocs; 6096 if (nrank<size%redprocs) lrows++; 6097 } 6098 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6099 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6100 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6101 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6102 row = nrank; 6103 ncols = xadj[1]-xadj[0]; 6104 cols = adjncy; 6105 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6106 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6107 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6108 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6109 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6110 ierr = PetscFree(xadj);CHKERRQ(ierr); 6111 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6112 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6113 ierr = PetscFree(vals);CHKERRQ(ierr); 6114 if (use_vwgt) { 6115 Vec v; 6116 const PetscScalar *array; 6117 PetscInt nl; 6118 6119 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6120 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6121 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6122 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6123 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6124 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6125 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6126 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6127 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6128 ierr = VecDestroy(&v);CHKERRQ(ierr); 6129 } 6130 } else { 6131 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6132 if (use_vwgt) { 6133 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6134 v_wgt[0] = local_size; 6135 } 6136 } 6137 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6138 6139 /* Partition */ 6140 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6141 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6142 if (v_wgt) { 6143 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6144 } 6145 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6146 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6147 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6148 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6149 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6150 6151 /* renumber new_ranks to avoid "holes" in new set of processors */ 6152 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6153 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6154 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6155 if (!aggregate) { 6156 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6157 #if defined(PETSC_USE_DEBUG) 6158 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6159 #endif 6160 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6161 } else if (oldranks) { 6162 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6163 } else { 6164 ranks_send_to_idx[0] = is_indices[0]; 6165 } 6166 } else { 6167 PetscInt idxs[1]; 6168 PetscMPIInt tag; 6169 MPI_Request *reqs; 6170 6171 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6172 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6173 for (i=rstart;i<rend;i++) { 6174 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6175 } 6176 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6177 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6178 ierr = PetscFree(reqs);CHKERRQ(ierr); 6179 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6180 #if defined(PETSC_USE_DEBUG) 6181 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6182 #endif 6183 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6184 } else if (oldranks) { 6185 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6186 } else { 6187 ranks_send_to_idx[0] = idxs[0]; 6188 } 6189 } 6190 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6191 /* clean up */ 6192 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6193 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6194 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6195 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6196 } 6197 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6198 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6199 6200 /* assemble parallel IS for sends */ 6201 i = 1; 6202 if (!color) i=0; 6203 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6204 PetscFunctionReturn(0); 6205 } 6206 6207 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6208 6209 #undef __FUNCT__ 6210 #define __FUNCT__ "MatISSubassemble" 6211 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6212 { 6213 Mat local_mat; 6214 IS is_sends_internal; 6215 PetscInt rows,cols,new_local_rows; 6216 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6217 PetscBool ismatis,isdense,newisdense,destroy_mat; 6218 ISLocalToGlobalMapping l2gmap; 6219 PetscInt* l2gmap_indices; 6220 const PetscInt* is_indices; 6221 MatType new_local_type; 6222 /* buffers */ 6223 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6224 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6225 PetscInt *recv_buffer_idxs_local; 6226 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6227 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6228 /* MPI */ 6229 MPI_Comm comm,comm_n; 6230 PetscSubcomm subcomm; 6231 PetscMPIInt n_sends,n_recvs,commsize; 6232 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6233 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6234 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6235 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6236 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6237 PetscErrorCode ierr; 6238 6239 PetscFunctionBegin; 6240 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6241 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6242 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6243 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6244 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6245 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6246 PetscValidLogicalCollectiveBool(mat,reuse,6); 6247 PetscValidLogicalCollectiveInt(mat,nis,8); 6248 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6249 if (nvecs) { 6250 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6251 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6252 } 6253 /* further checks */ 6254 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6255 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6256 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6257 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6258 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6259 if (reuse && *mat_n) { 6260 PetscInt mrows,mcols,mnrows,mncols; 6261 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6262 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6263 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6264 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6265 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6266 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6267 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6268 } 6269 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6270 PetscValidLogicalCollectiveInt(mat,bs,0); 6271 6272 /* prepare IS for sending if not provided */ 6273 if (!is_sends) { 6274 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6275 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6276 } else { 6277 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6278 is_sends_internal = is_sends; 6279 } 6280 6281 /* get comm */ 6282 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6283 6284 /* compute number of sends */ 6285 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6286 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6287 6288 /* compute number of receives */ 6289 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6290 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6291 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6292 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6293 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6294 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6295 ierr = PetscFree(iflags);CHKERRQ(ierr); 6296 6297 /* restrict comm if requested */ 6298 subcomm = 0; 6299 destroy_mat = PETSC_FALSE; 6300 if (restrict_comm) { 6301 PetscMPIInt color,subcommsize; 6302 6303 color = 0; 6304 if (restrict_full) { 6305 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6306 } else { 6307 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6308 } 6309 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6310 subcommsize = commsize - subcommsize; 6311 /* check if reuse has been requested */ 6312 if (reuse) { 6313 if (*mat_n) { 6314 PetscMPIInt subcommsize2; 6315 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6316 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6317 comm_n = PetscObjectComm((PetscObject)*mat_n); 6318 } else { 6319 comm_n = PETSC_COMM_SELF; 6320 } 6321 } else { /* MAT_INITIAL_MATRIX */ 6322 PetscMPIInt rank; 6323 6324 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6325 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6326 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6327 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6328 comm_n = PetscSubcommChild(subcomm); 6329 } 6330 /* flag to destroy *mat_n if not significative */ 6331 if (color) destroy_mat = PETSC_TRUE; 6332 } else { 6333 comm_n = comm; 6334 } 6335 6336 /* prepare send/receive buffers */ 6337 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6338 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6339 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6340 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6341 if (nis) { 6342 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6343 } 6344 6345 /* Get data from local matrices */ 6346 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6347 /* TODO: See below some guidelines on how to prepare the local buffers */ 6348 /* 6349 send_buffer_vals should contain the raw values of the local matrix 6350 send_buffer_idxs should contain: 6351 - MatType_PRIVATE type 6352 - PetscInt size_of_l2gmap 6353 - PetscInt global_row_indices[size_of_l2gmap] 6354 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6355 */ 6356 else { 6357 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6358 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6359 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6360 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6361 send_buffer_idxs[1] = i; 6362 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6363 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6364 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6365 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6366 for (i=0;i<n_sends;i++) { 6367 ilengths_vals[is_indices[i]] = len*len; 6368 ilengths_idxs[is_indices[i]] = len+2; 6369 } 6370 } 6371 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6372 /* additional is (if any) */ 6373 if (nis) { 6374 PetscMPIInt psum; 6375 PetscInt j; 6376 for (j=0,psum=0;j<nis;j++) { 6377 PetscInt plen; 6378 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6379 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6380 psum += len+1; /* indices + lenght */ 6381 } 6382 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6383 for (j=0,psum=0;j<nis;j++) { 6384 PetscInt plen; 6385 const PetscInt *is_array_idxs; 6386 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6387 send_buffer_idxs_is[psum] = plen; 6388 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6389 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6390 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6391 psum += plen+1; /* indices + lenght */ 6392 } 6393 for (i=0;i<n_sends;i++) { 6394 ilengths_idxs_is[is_indices[i]] = psum; 6395 } 6396 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6397 } 6398 6399 buf_size_idxs = 0; 6400 buf_size_vals = 0; 6401 buf_size_idxs_is = 0; 6402 buf_size_vecs = 0; 6403 for (i=0;i<n_recvs;i++) { 6404 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6405 buf_size_vals += (PetscInt)olengths_vals[i]; 6406 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6407 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6408 } 6409 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6410 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6411 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6412 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6413 6414 /* get new tags for clean communications */ 6415 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6416 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6417 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6418 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6419 6420 /* allocate for requests */ 6421 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6422 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6423 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6424 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6425 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6426 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6427 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6428 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6429 6430 /* communications */ 6431 ptr_idxs = recv_buffer_idxs; 6432 ptr_vals = recv_buffer_vals; 6433 ptr_idxs_is = recv_buffer_idxs_is; 6434 ptr_vecs = recv_buffer_vecs; 6435 for (i=0;i<n_recvs;i++) { 6436 source_dest = onodes[i]; 6437 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6438 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6439 ptr_idxs += olengths_idxs[i]; 6440 ptr_vals += olengths_vals[i]; 6441 if (nis) { 6442 source_dest = onodes_is[i]; 6443 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6444 ptr_idxs_is += olengths_idxs_is[i]; 6445 } 6446 if (nvecs) { 6447 source_dest = onodes[i]; 6448 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6449 ptr_vecs += olengths_idxs[i]-2; 6450 } 6451 } 6452 for (i=0;i<n_sends;i++) { 6453 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6454 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6455 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6456 if (nis) { 6457 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6458 } 6459 if (nvecs) { 6460 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6461 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6462 } 6463 } 6464 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6465 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6466 6467 /* assemble new l2g map */ 6468 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6469 ptr_idxs = recv_buffer_idxs; 6470 new_local_rows = 0; 6471 for (i=0;i<n_recvs;i++) { 6472 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6473 ptr_idxs += olengths_idxs[i]; 6474 } 6475 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6476 ptr_idxs = recv_buffer_idxs; 6477 new_local_rows = 0; 6478 for (i=0;i<n_recvs;i++) { 6479 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6480 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6481 ptr_idxs += olengths_idxs[i]; 6482 } 6483 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6484 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6485 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6486 6487 /* infer new local matrix type from received local matrices type */ 6488 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6489 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6490 if (n_recvs) { 6491 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6492 ptr_idxs = recv_buffer_idxs; 6493 for (i=0;i<n_recvs;i++) { 6494 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6495 new_local_type_private = MATAIJ_PRIVATE; 6496 break; 6497 } 6498 ptr_idxs += olengths_idxs[i]; 6499 } 6500 switch (new_local_type_private) { 6501 case MATDENSE_PRIVATE: 6502 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6503 new_local_type = MATSEQAIJ; 6504 bs = 1; 6505 } else { /* if I receive only 1 dense matrix */ 6506 new_local_type = MATSEQDENSE; 6507 bs = 1; 6508 } 6509 break; 6510 case MATAIJ_PRIVATE: 6511 new_local_type = MATSEQAIJ; 6512 bs = 1; 6513 break; 6514 case MATBAIJ_PRIVATE: 6515 new_local_type = MATSEQBAIJ; 6516 break; 6517 case MATSBAIJ_PRIVATE: 6518 new_local_type = MATSEQSBAIJ; 6519 break; 6520 default: 6521 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6522 break; 6523 } 6524 } else { /* by default, new_local_type is seqdense */ 6525 new_local_type = MATSEQDENSE; 6526 bs = 1; 6527 } 6528 6529 /* create MATIS object if needed */ 6530 if (!reuse) { 6531 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6532 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6533 } else { 6534 /* it also destroys the local matrices */ 6535 if (*mat_n) { 6536 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6537 } else { /* this is a fake object */ 6538 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6539 } 6540 } 6541 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6542 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6543 6544 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6545 6546 /* Global to local map of received indices */ 6547 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6548 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6549 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6550 6551 /* restore attributes -> type of incoming data and its size */ 6552 buf_size_idxs = 0; 6553 for (i=0;i<n_recvs;i++) { 6554 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6555 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6556 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6557 } 6558 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6559 6560 /* set preallocation */ 6561 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6562 if (!newisdense) { 6563 PetscInt *new_local_nnz=0; 6564 6565 ptr_idxs = recv_buffer_idxs_local; 6566 if (n_recvs) { 6567 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6568 } 6569 for (i=0;i<n_recvs;i++) { 6570 PetscInt j; 6571 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6572 for (j=0;j<*(ptr_idxs+1);j++) { 6573 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6574 } 6575 } else { 6576 /* TODO */ 6577 } 6578 ptr_idxs += olengths_idxs[i]; 6579 } 6580 if (new_local_nnz) { 6581 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6582 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6583 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6584 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6585 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6586 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6587 } else { 6588 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6589 } 6590 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6591 } else { 6592 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6593 } 6594 6595 /* set values */ 6596 ptr_vals = recv_buffer_vals; 6597 ptr_idxs = recv_buffer_idxs_local; 6598 for (i=0;i<n_recvs;i++) { 6599 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6600 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6601 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6602 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6603 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6604 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6605 } else { 6606 /* TODO */ 6607 } 6608 ptr_idxs += olengths_idxs[i]; 6609 ptr_vals += olengths_vals[i]; 6610 } 6611 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6612 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6613 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6614 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6615 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6616 6617 #if 0 6618 if (!restrict_comm) { /* check */ 6619 Vec lvec,rvec; 6620 PetscReal infty_error; 6621 6622 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6623 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6624 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6625 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6626 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6627 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6628 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6629 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6630 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6631 } 6632 #endif 6633 6634 /* assemble new additional is (if any) */ 6635 if (nis) { 6636 PetscInt **temp_idxs,*count_is,j,psum; 6637 6638 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6639 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6640 ptr_idxs = recv_buffer_idxs_is; 6641 psum = 0; 6642 for (i=0;i<n_recvs;i++) { 6643 for (j=0;j<nis;j++) { 6644 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6645 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6646 psum += plen; 6647 ptr_idxs += plen+1; /* shift pointer to received data */ 6648 } 6649 } 6650 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6651 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6652 for (i=1;i<nis;i++) { 6653 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6654 } 6655 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6656 ptr_idxs = recv_buffer_idxs_is; 6657 for (i=0;i<n_recvs;i++) { 6658 for (j=0;j<nis;j++) { 6659 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6660 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6661 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6662 ptr_idxs += plen+1; /* shift pointer to received data */ 6663 } 6664 } 6665 for (i=0;i<nis;i++) { 6666 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6667 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6668 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6669 } 6670 ierr = PetscFree(count_is);CHKERRQ(ierr); 6671 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6672 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6673 } 6674 /* free workspace */ 6675 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6676 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6677 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6678 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6679 if (isdense) { 6680 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6681 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6682 } else { 6683 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6684 } 6685 if (nis) { 6686 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6687 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6688 } 6689 6690 if (nvecs) { 6691 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6692 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6693 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6694 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6695 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6696 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6697 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6698 /* set values */ 6699 ptr_vals = recv_buffer_vecs; 6700 ptr_idxs = recv_buffer_idxs_local; 6701 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6702 for (i=0;i<n_recvs;i++) { 6703 PetscInt j; 6704 for (j=0;j<*(ptr_idxs+1);j++) { 6705 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6706 } 6707 ptr_idxs += olengths_idxs[i]; 6708 ptr_vals += olengths_idxs[i]-2; 6709 } 6710 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6711 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6712 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6713 } 6714 6715 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6716 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6717 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6718 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6719 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6720 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6721 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6722 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6723 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6724 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6725 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6726 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6727 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6728 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6729 ierr = PetscFree(onodes);CHKERRQ(ierr); 6730 if (nis) { 6731 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6732 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6733 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6734 } 6735 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6736 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6737 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6738 for (i=0;i<nis;i++) { 6739 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6740 } 6741 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6742 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6743 } 6744 *mat_n = NULL; 6745 } 6746 PetscFunctionReturn(0); 6747 } 6748 6749 /* temporary hack into ksp private data structure */ 6750 #include <petsc/private/kspimpl.h> 6751 6752 #undef __FUNCT__ 6753 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6754 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6755 { 6756 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6757 PC_IS *pcis = (PC_IS*)pc->data; 6758 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6759 Mat coarsedivudotp = NULL; 6760 MatNullSpace CoarseNullSpace = NULL; 6761 ISLocalToGlobalMapping coarse_islg; 6762 IS coarse_is,*isarray; 6763 PetscInt i,im_active=-1,active_procs=-1; 6764 PetscInt nis,nisdofs,nisneu,nisvert; 6765 PC pc_temp; 6766 PCType coarse_pc_type; 6767 KSPType coarse_ksp_type; 6768 PetscBool multilevel_requested,multilevel_allowed; 6769 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6770 Mat t_coarse_mat_is; 6771 PetscInt ncoarse; 6772 PetscBool compute_vecs = PETSC_FALSE; 6773 PetscScalar *array; 6774 MatReuse coarse_mat_reuse; 6775 PetscBool restr, full_restr, have_void; 6776 PetscErrorCode ierr; 6777 6778 PetscFunctionBegin; 6779 /* Assign global numbering to coarse dofs */ 6780 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6781 PetscInt ocoarse_size; 6782 compute_vecs = PETSC_TRUE; 6783 ocoarse_size = pcbddc->coarse_size; 6784 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6785 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6786 /* see if we can avoid some work */ 6787 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6788 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6789 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6790 PC pc; 6791 PetscBool isbddc; 6792 6793 /* temporary workaround since PCBDDC does not have a reset method so far */ 6794 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6795 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6796 if (isbddc) { 6797 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6798 } else { 6799 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6800 } 6801 coarse_reuse = PETSC_FALSE; 6802 } else { /* we can safely reuse already computed coarse matrix */ 6803 coarse_reuse = PETSC_TRUE; 6804 } 6805 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6806 coarse_reuse = PETSC_FALSE; 6807 } 6808 /* reset any subassembling information */ 6809 if (!coarse_reuse || pcbddc->recompute_topography) { 6810 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6811 } 6812 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6813 coarse_reuse = PETSC_TRUE; 6814 } 6815 /* assemble coarse matrix */ 6816 if (coarse_reuse && pcbddc->coarse_ksp) { 6817 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6818 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6819 coarse_mat_reuse = MAT_REUSE_MATRIX; 6820 } else { 6821 coarse_mat = NULL; 6822 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6823 } 6824 6825 /* creates temporary l2gmap and IS for coarse indexes */ 6826 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6827 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6828 6829 /* creates temporary MATIS object for coarse matrix */ 6830 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6831 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6832 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6833 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6834 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6835 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6836 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6837 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6838 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6839 6840 /* count "active" (i.e. with positive local size) and "void" processes */ 6841 im_active = !!(pcis->n); 6842 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6843 6844 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6845 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6846 /* full_restr : just use the receivers from the subassembling pattern */ 6847 coarse_mat_is = NULL; 6848 multilevel_allowed = PETSC_FALSE; 6849 multilevel_requested = PETSC_FALSE; 6850 full_restr = PETSC_TRUE; 6851 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6852 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6853 if (multilevel_requested) { 6854 ncoarse = active_procs/pcbddc->coarsening_ratio; 6855 restr = PETSC_FALSE; 6856 full_restr = PETSC_FALSE; 6857 } else { 6858 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6859 restr = PETSC_TRUE; 6860 full_restr = PETSC_TRUE; 6861 } 6862 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6863 ncoarse = PetscMax(1,ncoarse); 6864 if (!pcbddc->coarse_subassembling) { 6865 if (pcbddc->coarsening_ratio > 1) { 6866 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6867 } else { 6868 PetscMPIInt size,rank; 6869 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6870 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6871 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6872 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6873 } 6874 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6875 PetscInt psum; 6876 PetscMPIInt size; 6877 if (pcbddc->coarse_ksp) psum = 1; 6878 else psum = 0; 6879 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6880 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6881 if (ncoarse < size) have_void = PETSC_TRUE; 6882 } 6883 /* determine if we can go multilevel */ 6884 if (multilevel_requested) { 6885 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6886 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6887 } 6888 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6889 6890 /* dump subassembling pattern */ 6891 if (pcbddc->dbg_flag && multilevel_allowed) { 6892 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6893 } 6894 6895 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6896 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6897 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6898 const PetscInt *idxs; 6899 ISLocalToGlobalMapping tmap; 6900 6901 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6902 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6903 /* allocate space for temporary storage */ 6904 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6905 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 6906 /* allocate for IS array */ 6907 nisdofs = pcbddc->n_ISForDofsLocal; 6908 nisneu = !!pcbddc->NeumannBoundariesLocal; 6909 nisvert = 0; /* nisvert is not used */ 6910 nis = nisdofs + nisneu + nisvert; 6911 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 6912 /* dofs splitting */ 6913 for (i=0;i<nisdofs;i++) { 6914 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 6915 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 6916 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6917 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6918 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6919 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6920 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6921 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 6922 } 6923 /* neumann boundaries */ 6924 if (pcbddc->NeumannBoundariesLocal) { 6925 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 6926 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 6927 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6928 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6929 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6930 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6931 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 6932 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 6933 } 6934 /* free memory */ 6935 ierr = PetscFree(tidxs);CHKERRQ(ierr); 6936 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 6937 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6938 } else { 6939 nis = 0; 6940 nisdofs = 0; 6941 nisneu = 0; 6942 nisvert = 0; 6943 isarray = NULL; 6944 } 6945 /* destroy no longer needed map */ 6946 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6947 6948 /* subassemble */ 6949 if (multilevel_allowed) { 6950 Vec vp[1]; 6951 PetscInt nvecs = 0; 6952 PetscBool reuse,reuser; 6953 6954 if (coarse_mat) reuse = PETSC_TRUE; 6955 else reuse = PETSC_FALSE; 6956 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6957 vp[0] = NULL; 6958 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6959 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6960 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6961 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6962 nvecs = 1; 6963 6964 if (pcbddc->divudotp) { 6965 Mat B,loc_divudotp; 6966 Vec v,p; 6967 IS dummy; 6968 PetscInt np; 6969 6970 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6971 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6972 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6973 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6974 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6975 ierr = VecSet(p,1.);CHKERRQ(ierr); 6976 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6977 ierr = VecDestroy(&p);CHKERRQ(ierr); 6978 ierr = MatDestroy(&B);CHKERRQ(ierr); 6979 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6980 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6981 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6982 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6983 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6984 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6985 ierr = VecDestroy(&v);CHKERRQ(ierr); 6986 } 6987 } 6988 if (reuser) { 6989 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6990 } else { 6991 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6992 } 6993 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6994 PetscScalar *arraym,*arrayv; 6995 PetscInt nl; 6996 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 6997 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 6998 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6999 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7000 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7001 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7002 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7003 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7004 } else { 7005 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7006 } 7007 } else { 7008 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 7009 } 7010 if (coarse_mat_is || coarse_mat) { 7011 PetscMPIInt size; 7012 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 7013 if (!multilevel_allowed) { 7014 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7015 } else { 7016 Mat A; 7017 7018 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7019 if (coarse_mat_is) { 7020 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7021 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7022 coarse_mat = coarse_mat_is; 7023 } 7024 /* be sure we don't have MatSeqDENSE as local mat */ 7025 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7026 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7027 } 7028 } 7029 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7030 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7031 7032 /* create local to global scatters for coarse problem */ 7033 if (compute_vecs) { 7034 PetscInt lrows; 7035 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7036 if (coarse_mat) { 7037 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7038 } else { 7039 lrows = 0; 7040 } 7041 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7042 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7043 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7044 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7045 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7046 } 7047 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7048 7049 /* set defaults for coarse KSP and PC */ 7050 if (multilevel_allowed) { 7051 coarse_ksp_type = KSPRICHARDSON; 7052 coarse_pc_type = PCBDDC; 7053 } else { 7054 coarse_ksp_type = KSPPREONLY; 7055 coarse_pc_type = PCREDUNDANT; 7056 } 7057 7058 /* print some info if requested */ 7059 if (pcbddc->dbg_flag) { 7060 if (!multilevel_allowed) { 7061 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7062 if (multilevel_requested) { 7063 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7064 } else if (pcbddc->max_levels) { 7065 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7066 } 7067 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7068 } 7069 } 7070 7071 /* create the coarse KSP object only once with defaults */ 7072 if (coarse_mat) { 7073 PetscViewer dbg_viewer = NULL; 7074 if (pcbddc->dbg_flag) { 7075 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7076 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7077 } 7078 if (!pcbddc->coarse_ksp) { 7079 char prefix[256],str_level[16]; 7080 size_t len; 7081 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7082 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7083 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7084 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7085 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7086 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7087 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7088 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7089 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7090 /* prefix */ 7091 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7092 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7093 if (!pcbddc->current_level) { 7094 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7095 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7096 } else { 7097 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7098 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7099 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7100 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7101 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7102 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7103 } 7104 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7105 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7106 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7107 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7108 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7109 /* allow user customization */ 7110 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7111 } 7112 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7113 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7114 if (nisdofs) { 7115 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7116 for (i=0;i<nisdofs;i++) { 7117 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7118 } 7119 } 7120 if (nisneu) { 7121 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7122 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7123 } 7124 if (nisvert) { 7125 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7126 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7127 } 7128 7129 /* get some info after set from options */ 7130 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7131 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7132 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7133 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7134 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7135 isbddc = PETSC_FALSE; 7136 } 7137 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7138 if (isredundant) { 7139 KSP inner_ksp; 7140 PC inner_pc; 7141 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7142 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7143 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7144 } 7145 7146 /* parameters which miss an API */ 7147 if (isbddc) { 7148 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7149 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7150 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7151 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7152 if (pcbddc_coarse->benign_saddle_point) { 7153 Mat coarsedivudotp_is; 7154 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7155 IS row,col; 7156 const PetscInt *gidxs; 7157 PetscInt n,st,M,N; 7158 7159 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7160 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7161 st = st-n; 7162 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7163 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7164 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7165 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7166 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7167 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7168 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7169 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7170 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7171 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7172 ierr = ISDestroy(&row);CHKERRQ(ierr); 7173 ierr = ISDestroy(&col);CHKERRQ(ierr); 7174 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7175 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7176 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7177 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7178 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7179 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7180 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7181 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7182 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7183 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7184 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7185 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7186 } 7187 } 7188 7189 /* propagate symmetry info of coarse matrix */ 7190 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7191 if (pc->pmat->symmetric_set) { 7192 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7193 } 7194 if (pc->pmat->hermitian_set) { 7195 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7196 } 7197 if (pc->pmat->spd_set) { 7198 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7199 } 7200 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7201 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7202 } 7203 /* set operators */ 7204 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7205 if (pcbddc->dbg_flag) { 7206 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7207 } 7208 } 7209 ierr = PetscFree(isarray);CHKERRQ(ierr); 7210 #if 0 7211 { 7212 PetscViewer viewer; 7213 char filename[256]; 7214 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7215 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7216 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7217 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7218 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7219 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7220 } 7221 #endif 7222 7223 if (pcbddc->coarse_ksp) { 7224 Vec crhs,csol; 7225 7226 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7227 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7228 if (!csol) { 7229 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7230 } 7231 if (!crhs) { 7232 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7233 } 7234 } 7235 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7236 7237 /* compute null space for coarse solver if the benign trick has been requested */ 7238 if (pcbddc->benign_null) { 7239 7240 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7241 for (i=0;i<pcbddc->benign_n;i++) { 7242 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7243 } 7244 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7245 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7246 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7247 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7248 if (coarse_mat) { 7249 Vec nullv; 7250 PetscScalar *array,*array2; 7251 PetscInt nl; 7252 7253 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7254 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7255 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7256 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7257 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7258 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7259 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7260 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7261 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7262 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7263 } 7264 } 7265 7266 if (pcbddc->coarse_ksp) { 7267 PetscBool ispreonly; 7268 7269 if (CoarseNullSpace) { 7270 PetscBool isnull; 7271 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7272 if (isnull) { 7273 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7274 } 7275 /* TODO: add local nullspaces (if any) */ 7276 } 7277 /* setup coarse ksp */ 7278 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7279 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7280 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7281 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7282 KSP check_ksp; 7283 KSPType check_ksp_type; 7284 PC check_pc; 7285 Vec check_vec,coarse_vec; 7286 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7287 PetscInt its; 7288 PetscBool compute_eigs; 7289 PetscReal *eigs_r,*eigs_c; 7290 PetscInt neigs; 7291 const char *prefix; 7292 7293 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7294 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7295 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7296 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7297 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7298 /* prevent from setup unneeded object */ 7299 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7300 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7301 if (ispreonly) { 7302 check_ksp_type = KSPPREONLY; 7303 compute_eigs = PETSC_FALSE; 7304 } else { 7305 check_ksp_type = KSPGMRES; 7306 compute_eigs = PETSC_TRUE; 7307 } 7308 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7309 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7310 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7311 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7312 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7313 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7314 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7315 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7316 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7317 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7318 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7319 /* create random vec */ 7320 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7321 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7322 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7323 /* solve coarse problem */ 7324 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7325 /* set eigenvalue estimation if preonly has not been requested */ 7326 if (compute_eigs) { 7327 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7328 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7329 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7330 if (neigs) { 7331 lambda_max = eigs_r[neigs-1]; 7332 lambda_min = eigs_r[0]; 7333 if (pcbddc->use_coarse_estimates) { 7334 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7335 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7336 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7337 } 7338 } 7339 } 7340 } 7341 7342 /* check coarse problem residual error */ 7343 if (pcbddc->dbg_flag) { 7344 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7345 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7346 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7347 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7348 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7349 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7350 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7351 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7352 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7353 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7354 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7355 if (CoarseNullSpace) { 7356 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7357 } 7358 if (compute_eigs) { 7359 PetscReal lambda_max_s,lambda_min_s; 7360 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7361 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7362 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7363 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7364 for (i=0;i<neigs;i++) { 7365 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7366 } 7367 } 7368 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7369 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7370 } 7371 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7372 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7373 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7374 if (compute_eigs) { 7375 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7376 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7377 } 7378 } 7379 } 7380 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7381 /* print additional info */ 7382 if (pcbddc->dbg_flag) { 7383 /* waits until all processes reaches this point */ 7384 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7385 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7386 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7387 } 7388 7389 /* free memory */ 7390 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7391 PetscFunctionReturn(0); 7392 } 7393 7394 #undef __FUNCT__ 7395 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7396 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7397 { 7398 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7399 PC_IS* pcis = (PC_IS*)pc->data; 7400 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7401 IS subset,subset_mult,subset_n; 7402 PetscInt local_size,coarse_size=0; 7403 PetscInt *local_primal_indices=NULL; 7404 const PetscInt *t_local_primal_indices; 7405 PetscErrorCode ierr; 7406 7407 PetscFunctionBegin; 7408 /* Compute global number of coarse dofs */ 7409 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7410 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7411 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7412 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7413 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7414 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7415 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7416 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7417 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7418 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7419 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7420 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7421 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7422 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7423 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7424 7425 /* check numbering */ 7426 if (pcbddc->dbg_flag) { 7427 PetscScalar coarsesum,*array,*array2; 7428 PetscInt i; 7429 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7430 7431 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7432 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7433 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7434 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7435 /* counter */ 7436 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7437 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7438 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7439 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7440 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7441 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7442 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7443 for (i=0;i<pcbddc->local_primal_size;i++) { 7444 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7445 } 7446 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7447 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7448 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7449 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7450 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7451 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7452 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7453 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7454 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7455 for (i=0;i<pcis->n;i++) { 7456 if (array[i] != 0.0 && array[i] != array2[i]) { 7457 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7458 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7459 set_error = PETSC_TRUE; 7460 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7461 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7462 } 7463 } 7464 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7465 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7466 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7467 for (i=0;i<pcis->n;i++) { 7468 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7469 } 7470 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7471 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7472 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7473 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7474 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7475 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7476 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7477 PetscInt *gidxs; 7478 7479 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7480 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7481 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7482 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7483 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7484 for (i=0;i<pcbddc->local_primal_size;i++) { 7485 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7486 } 7487 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7488 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7489 } 7490 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7491 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7492 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7493 } 7494 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7495 /* get back data */ 7496 *coarse_size_n = coarse_size; 7497 *local_primal_indices_n = local_primal_indices; 7498 PetscFunctionReturn(0); 7499 } 7500 7501 #undef __FUNCT__ 7502 #define __FUNCT__ "PCBDDCGlobalToLocal" 7503 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7504 { 7505 IS localis_t; 7506 PetscInt i,lsize,*idxs,n; 7507 PetscScalar *vals; 7508 PetscErrorCode ierr; 7509 7510 PetscFunctionBegin; 7511 /* get indices in local ordering exploiting local to global map */ 7512 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7513 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7514 for (i=0;i<lsize;i++) vals[i] = 1.0; 7515 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7516 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7517 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7518 if (idxs) { /* multilevel guard */ 7519 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7520 } 7521 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7522 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7523 ierr = PetscFree(vals);CHKERRQ(ierr); 7524 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7525 /* now compute set in local ordering */ 7526 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7527 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7528 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7529 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7530 for (i=0,lsize=0;i<n;i++) { 7531 if (PetscRealPart(vals[i]) > 0.5) { 7532 lsize++; 7533 } 7534 } 7535 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7536 for (i=0,lsize=0;i<n;i++) { 7537 if (PetscRealPart(vals[i]) > 0.5) { 7538 idxs[lsize++] = i; 7539 } 7540 } 7541 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7542 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7543 *localis = localis_t; 7544 PetscFunctionReturn(0); 7545 } 7546 7547 #undef __FUNCT__ 7548 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7549 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7550 { 7551 PC_IS *pcis=(PC_IS*)pc->data; 7552 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7553 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7554 Mat S_j; 7555 PetscInt *used_xadj,*used_adjncy; 7556 PetscBool free_used_adj; 7557 PetscErrorCode ierr; 7558 7559 PetscFunctionBegin; 7560 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7561 free_used_adj = PETSC_FALSE; 7562 if (pcbddc->sub_schurs_layers == -1) { 7563 used_xadj = NULL; 7564 used_adjncy = NULL; 7565 } else { 7566 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7567 used_xadj = pcbddc->mat_graph->xadj; 7568 used_adjncy = pcbddc->mat_graph->adjncy; 7569 } else if (pcbddc->computed_rowadj) { 7570 used_xadj = pcbddc->mat_graph->xadj; 7571 used_adjncy = pcbddc->mat_graph->adjncy; 7572 } else { 7573 PetscBool flg_row=PETSC_FALSE; 7574 const PetscInt *xadj,*adjncy; 7575 PetscInt nvtxs; 7576 7577 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7578 if (flg_row) { 7579 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7580 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7581 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7582 free_used_adj = PETSC_TRUE; 7583 } else { 7584 pcbddc->sub_schurs_layers = -1; 7585 used_xadj = NULL; 7586 used_adjncy = NULL; 7587 } 7588 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7589 } 7590 } 7591 7592 /* setup sub_schurs data */ 7593 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7594 if (!sub_schurs->schur_explicit) { 7595 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7596 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7597 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7598 } else { 7599 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7600 PetscBool isseqaij,need_change = PETSC_FALSE; 7601 PetscInt benign_n; 7602 Mat change = NULL; 7603 Vec scaling = NULL; 7604 IS change_primal = NULL; 7605 7606 if (!pcbddc->use_vertices && reuse_solvers) { 7607 PetscInt n_vertices; 7608 7609 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7610 reuse_solvers = (PetscBool)!n_vertices; 7611 } 7612 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7613 if (!isseqaij) { 7614 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7615 if (matis->A == pcbddc->local_mat) { 7616 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7617 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7618 } else { 7619 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7620 } 7621 } 7622 if (!pcbddc->benign_change_explicit) { 7623 benign_n = pcbddc->benign_n; 7624 } else { 7625 benign_n = 0; 7626 } 7627 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7628 We need a global reduction to avoid possible deadlocks. 7629 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7630 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7631 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7632 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7633 need_change = (PetscBool)(!need_change); 7634 } 7635 /* If the user defines additional constraints, we import them here. 7636 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7637 if (need_change) { 7638 PC_IS *pcisf; 7639 PC_BDDC *pcbddcf; 7640 PC pcf; 7641 7642 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7643 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7644 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7645 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7646 /* hacks */ 7647 pcisf = (PC_IS*)pcf->data; 7648 pcisf->is_B_local = pcis->is_B_local; 7649 pcisf->vec1_N = pcis->vec1_N; 7650 pcisf->BtoNmap = pcis->BtoNmap; 7651 pcisf->n = pcis->n; 7652 pcisf->n_B = pcis->n_B; 7653 pcbddcf = (PC_BDDC*)pcf->data; 7654 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7655 pcbddcf->mat_graph = pcbddc->mat_graph; 7656 pcbddcf->use_faces = PETSC_TRUE; 7657 pcbddcf->use_change_of_basis = PETSC_TRUE; 7658 pcbddcf->use_change_on_faces = PETSC_TRUE; 7659 pcbddcf->use_qr_single = PETSC_TRUE; 7660 pcbddcf->fake_change = PETSC_TRUE; 7661 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7662 /* store information on primal vertices and change of basis (in local numbering) */ 7663 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7664 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7665 change = pcbddcf->ConstraintMatrix; 7666 pcbddcf->ConstraintMatrix = NULL; 7667 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7668 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7669 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7670 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7671 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7672 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7673 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7674 pcf->ops->destroy = NULL; 7675 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7676 } 7677 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7678 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7679 ierr = MatDestroy(&change);CHKERRQ(ierr); 7680 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7681 } 7682 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7683 7684 /* free adjacency */ 7685 if (free_used_adj) { 7686 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7687 } 7688 PetscFunctionReturn(0); 7689 } 7690 7691 #undef __FUNCT__ 7692 #define __FUNCT__ "PCBDDCInitSubSchurs" 7693 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7694 { 7695 PC_IS *pcis=(PC_IS*)pc->data; 7696 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7697 PCBDDCGraph graph; 7698 PetscErrorCode ierr; 7699 7700 PetscFunctionBegin; 7701 /* attach interface graph for determining subsets */ 7702 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7703 IS verticesIS,verticescomm; 7704 PetscInt vsize,*idxs; 7705 7706 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7707 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7708 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7709 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7710 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7711 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7712 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7713 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7714 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7715 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7716 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7717 } else { 7718 graph = pcbddc->mat_graph; 7719 } 7720 /* print some info */ 7721 if (pcbddc->dbg_flag) { 7722 IS vertices; 7723 PetscInt nv,nedges,nfaces; 7724 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7725 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7726 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7727 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7728 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7729 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7730 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7731 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7732 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7733 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7734 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7735 } 7736 7737 /* sub_schurs init */ 7738 if (!pcbddc->sub_schurs) { 7739 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7740 } 7741 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 7742 7743 /* free graph struct */ 7744 if (pcbddc->sub_schurs_rebuild) { 7745 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7746 } 7747 PetscFunctionReturn(0); 7748 } 7749 7750 #undef __FUNCT__ 7751 #define __FUNCT__ "PCBDDCCheckOperator" 7752 PetscErrorCode PCBDDCCheckOperator(PC pc) 7753 { 7754 PC_IS *pcis=(PC_IS*)pc->data; 7755 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7756 PetscErrorCode ierr; 7757 7758 PetscFunctionBegin; 7759 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7760 IS zerodiag = NULL; 7761 Mat S_j,B0_B=NULL; 7762 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7763 PetscScalar *p0_check,*array,*array2; 7764 PetscReal norm; 7765 PetscInt i; 7766 7767 /* B0 and B0_B */ 7768 if (zerodiag) { 7769 IS dummy; 7770 7771 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7772 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7773 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7774 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7775 } 7776 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7777 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7778 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7779 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7780 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7781 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7782 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7783 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7784 /* S_j */ 7785 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7786 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7787 7788 /* mimic vector in \widetilde{W}_\Gamma */ 7789 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7790 /* continuous in primal space */ 7791 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7792 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7793 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7794 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7795 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7796 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7797 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7798 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7799 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7800 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7801 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7802 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7803 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7804 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7805 7806 /* assemble rhs for coarse problem */ 7807 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7808 /* local with Schur */ 7809 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7810 if (zerodiag) { 7811 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7812 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7813 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7814 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7815 } 7816 /* sum on primal nodes the local contributions */ 7817 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7818 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7819 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7820 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7821 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7822 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7823 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7824 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7825 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7826 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7827 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7828 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7829 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7830 /* scale primal nodes (BDDC sums contibutions) */ 7831 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7832 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7833 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7834 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7835 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7836 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7837 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7838 /* global: \widetilde{B0}_B w_\Gamma */ 7839 if (zerodiag) { 7840 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7841 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7842 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7843 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7844 } 7845 /* BDDC */ 7846 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7847 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7848 7849 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7850 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7851 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7852 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7853 for (i=0;i<pcbddc->benign_n;i++) { 7854 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7855 } 7856 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7857 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7858 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7859 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7860 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7861 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7862 } 7863 PetscFunctionReturn(0); 7864 } 7865