1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 if (!esize) PetscFunctionReturn(0); 84 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 85 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 86 87 /* gradients */ 88 ptr = work + 5*esize; 89 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 90 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 91 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 92 ierr = MatDestroy(&GE);CHKERRQ(ierr); 93 94 /* constants */ 95 ptr += rsize*csize; 96 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 97 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 99 ierr = MatDestroy(&GE);CHKERRQ(ierr); 100 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 101 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "PCBDDCNedelecSupport" 107 PetscErrorCode PCBDDCNedelecSupport(PC pc) 108 { 109 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 110 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 111 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 112 MatNullSpace nnsp; 113 Vec tvec,*quads; 114 PetscSF sfv; 115 ISLocalToGlobalMapping el2g,vl2g,fl2g; 116 MPI_Comm comm; 117 IS lned,primals,allprimals,nedfieldlocal; 118 IS *eedges,*extrows,*extcols,*alleedges; 119 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee,bter; 120 PetscScalar *vals,*work; 121 PetscReal *rwork; 122 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 123 PetscInt ne,nv,Lv,order,n,field,rst; 124 PetscInt n_neigh,*neigh,*n_shared,**shared; 125 PetscInt i,j,extmem,cum,maxsize,nee,nquads=2; 126 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 127 PetscInt *sfvleaves,*sfvroots; 128 PetscInt *corners,*cedges; 129 #if defined(PETSC_USE_DEBUG) 130 PetscInt *emarks; 131 #endif 132 PetscBool print,eerr,done,lrc[2],conforming; 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 /* test variable order code and print debug info TODO: to be removed */ 137 print = PETSC_FALSE; 138 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 139 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 140 141 /* Return to caller if there are no edges in the decomposition */ 142 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 143 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 144 ierr = ISLocalToGlobalMappingGetSize(el2g,&n);CHKERRQ(ierr); 145 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 146 lrc[0] = PETSC_FALSE; 147 for (i=0;i<n;i++) { 148 if (PetscRealPart(vals[i]) > 2.) { 149 lrc[0] = PETSC_TRUE; 150 break; 151 } 152 } 153 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 154 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 155 if (!lrc[1]) PetscFunctionReturn(0); 156 157 /* Get discrete gradient 158 If it is defined for a subset of dofs, assumes G is given in global ordering for all the dofs */ 159 G = pcbddc->discretegradient; 160 order = pcbddc->nedorder; 161 conforming = pcbddc->conforming; 162 field = pcbddc->nedfield; 163 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 164 if (pcbddc->n_ISForDofsLocal && field > -1) { 165 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 166 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 167 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 168 } else { 169 PetscBool testnedfield = PETSC_FALSE; 170 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 171 if (!testnedfield) { 172 ne = n; 173 nedfieldlocal = NULL; 174 } else { 175 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 176 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 177 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 178 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 179 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 180 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 181 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 182 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 183 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 184 for (i=0,cum=0;i<n;i++) { 185 if (matis->sf_leafdata[i] > 1) { 186 matis->sf_leafdata[cum++] = i; 187 } 188 } 189 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 190 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 191 } 192 } 193 194 if (nedfieldlocal) { /* merge with previous code when testing is done */ 195 IS is; 196 197 /* need to map from the local Nedelec field to local numbering */ 198 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 199 /* need to map from the local Nedelec field to global numbering */ 200 ierr = ISLocalToGlobalMappingApplyIS(el2g,nedfieldlocal,&is);CHKERRQ(ierr); 201 ierr = ISLocalToGlobalMappingCreateIS(is,&el2g);CHKERRQ(ierr); 202 ierr = ISDestroy(&is);CHKERRQ(ierr); 203 } else { 204 ierr = PetscObjectReference((PetscObject)el2g);CHKERRQ(ierr); 205 fl2g = NULL; 206 } 207 208 /* Sanity checks */ 209 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 210 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 211 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 212 213 /* Drop connections for interior edges (this modifies G) */ 214 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 215 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 216 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 217 if (nedfieldlocal) { 218 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 219 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 220 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 221 } else { 222 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 223 } 224 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 225 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 226 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 227 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 228 if (matis->sf_rootdata[i] < 2) { 229 matis->sf_rootdata[cum++] = i + rst; 230 } 231 } 232 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 233 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 234 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 235 236 /* Extract subdomain relevant rows of G */ 237 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 238 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 239 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 240 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 241 ierr = ISDestroy(&lned);CHKERRQ(ierr); 242 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 243 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 244 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 245 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 246 if (print) { 247 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 248 ierr = MatView(lG,NULL);CHKERRQ(ierr); 249 } 250 251 /* SF for nodal communications */ 252 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 253 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 254 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 255 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 256 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 257 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 258 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 259 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 260 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 261 262 /* Destroy temporary G created in MATIS format */ 263 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 264 265 /* Save lG */ 266 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 267 268 /* Analyze the edge-nodes connections (duplicate lG) */ 269 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 270 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 271 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 272 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 273 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 274 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 275 /* need to import the boundary specification to ensure the 276 proper detection of coarse edges' endpoints */ 277 if (pcbddc->DirichletBoundariesLocal) { 278 IS is; 279 280 if (fl2g) { 281 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 282 } else { 283 is = pcbddc->DirichletBoundariesLocal; 284 } 285 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 286 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 287 for (i=0;i<cum;i++) { 288 if (idxs[i] >= 0) { 289 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 290 } 291 } 292 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 293 if (fl2g) { 294 ierr = ISDestroy(&is);CHKERRQ(ierr); 295 } 296 } 297 if (pcbddc->NeumannBoundariesLocal) { 298 IS is; 299 300 if (fl2g) { 301 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 302 } else { 303 is = pcbddc->NeumannBoundariesLocal; 304 } 305 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 306 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 307 for (i=0;i<cum;i++) { 308 if (idxs[i] >= 0) { 309 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 310 } 311 } 312 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 313 if (fl2g) { 314 ierr = ISDestroy(&is);CHKERRQ(ierr); 315 } 316 } 317 318 /* need to remove coarse faces' dofs to ensure the 319 proper detection of coarse edges' endpoints */ 320 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 321 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 322 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 323 for (i=1;i<n_neigh;i++) 324 for (j=0;j<n_shared[i];j++) 325 marks[shared[i][j]]++; 326 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) { 328 if (marks[i] > 1 || (marks[i] == 1 && PetscBTLookup(btb,i))) { 329 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 330 } 331 } 332 333 if (!conforming) { 334 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 335 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 336 } 337 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 338 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 339 cum = 0; 340 for (i=0;i<ne;i++) { 341 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 342 if (!PetscBTLookup(btee,i)) { 343 marks[cum++] = i; 344 continue; 345 } 346 /* set badly connected edge dofs as primal */ 347 if (!conforming) { 348 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 349 marks[cum++] = i; 350 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 351 for (j=ii[i];j<ii[i+1];j++) { 352 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 353 } 354 } else { 355 /* every edge dofs should be connected trough a certain number of nodal dofs 356 to other edge dofs belonging to coarse edges 357 - at most 2 endpoints 358 - order-1 interior nodal dofs 359 - no undefined nodal dofs (nconn < order) 360 */ 361 PetscInt ends = 0,ints = 0, undef = 0; 362 for (j=ii[i];j<ii[i+1];j++) { 363 PetscInt v = jj[j],k; 364 PetscInt nconn = iit[v+1]-iit[v]; 365 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 366 if (nconn > order) ends++; 367 else if (nconn == order) ints++; 368 else undef++; 369 } 370 if (undef || ends > 2 || ints != order -1) { 371 marks[cum++] = i; 372 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 373 for (j=ii[i];j<ii[i+1];j++) { 374 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 375 } 376 } 377 } 378 } 379 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 380 if (!order && ii[i+1] != ii[i]) { 381 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 382 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 383 } 384 } 385 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 386 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 387 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 388 if (!conforming) { 389 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 390 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 391 } 392 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 393 /* identify splitpoints and corner candidates */ 394 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 395 if (print) { 396 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 397 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 398 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 399 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 400 } 401 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 402 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 403 for (i=0;i<nv;i++) { 404 PetscInt ord = order, test = ii[i+1]-ii[i]; 405 if (!order) { /* variable order */ 406 PetscReal vorder = 0.; 407 408 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 409 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 410 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 411 ord = 1; 412 } 413 #if defined(PETSC_USE_DEBUG) 414 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 415 #endif 416 if (test >= 3*ord) { /* splitpoints */ 417 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 418 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 419 } else if (test == ord) { 420 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 421 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 422 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 423 } else { 424 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 425 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 426 } 427 } 428 } 429 430 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 431 if (order != 1) { 432 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 433 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 434 for (i=0;i<nv;i++) { 435 if (PetscBTLookup(btvcand,i)) { 436 PetscBool found = PETSC_FALSE; 437 for (j=ii[i];j<ii[i+1] && !found;j++) { 438 PetscInt k,e = jj[j]; 439 if (PetscBTLookup(bte,e)) continue; 440 for (k=iit[e];k<iit[e+1];k++) { 441 PetscInt v = jjt[k]; 442 if (v != i && PetscBTLookup(btvcand,v)) { 443 found = PETSC_TRUE; 444 break; 445 } 446 } 447 } 448 if (!found) { 449 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 450 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 451 } else { 452 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 453 } 454 } 455 } 456 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 457 } 458 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 459 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 460 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 461 462 /* Get the local G^T explicitly */ 463 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 464 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 465 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 466 467 /* Mark interior nodal dofs */ 468 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 469 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 470 for (i=1;i<n_neigh;i++) { 471 for (j=0;j<n_shared[i];j++) { 472 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 473 } 474 } 475 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 476 477 /* communicate corners and splitpoints */ 478 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 479 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 480 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 481 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 482 483 if (print) { 484 IS tbz; 485 486 cum = 0; 487 for (i=0;i<nv;i++) 488 if (sfvleaves[i]) 489 vmarks[cum++] = i; 490 491 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 492 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 493 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 494 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 495 } 496 497 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 498 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 499 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 500 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 501 502 /* Zero rows of lGt corresponding to identified corners 503 and interior nodal dofs */ 504 cum = 0; 505 for (i=0;i<nv;i++) { 506 if (sfvleaves[i]) { 507 vmarks[cum++] = i; 508 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 509 } 510 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 511 } 512 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 513 if (print) { 514 IS tbz; 515 516 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 517 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 518 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 519 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 520 } 521 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 522 ierr = PetscFree(vmarks);CHKERRQ(ierr); 523 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 524 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 525 526 /* Recompute G */ 527 ierr = MatDestroy(&lG);CHKERRQ(ierr); 528 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 529 if (print) { 530 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 531 ierr = MatView(lG,NULL);CHKERRQ(ierr); 532 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 533 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 534 } 535 536 /* Get primal dofs (if any) */ 537 cum = 0; 538 for (i=0;i<ne;i++) { 539 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 540 } 541 if (fl2g) { 542 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 543 } 544 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 545 if (print) { 546 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 547 ierr = ISView(primals,NULL);CHKERRQ(ierr); 548 } 549 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 550 /* TODO: what if the user passed in some of them ? */ 551 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 552 ierr = ISDestroy(&primals);CHKERRQ(ierr); 553 554 /* Compute edge connectivity */ 555 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 556 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 557 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 558 if (done && i && ii && jj) { /* when lG is empty, don't pass pointers */ 559 if (fl2g) { 560 PetscBT btf; 561 PetscInt *iia,*jja,*iiu,*jju; 562 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 563 564 /* create CSR for all local dofs */ 565 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 566 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 567 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 568 iiu = pcbddc->mat_graph->xadj; 569 jju = pcbddc->mat_graph->adjncy; 570 } else if (pcbddc->use_local_adj) { 571 rest = PETSC_TRUE; 572 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 573 } else { 574 free = PETSC_TRUE; 575 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 576 iiu[0] = 0; 577 for (i=0;i<n;i++) { 578 iiu[i+1] = i+1; 579 jju[i] = -1; 580 } 581 } 582 583 /* import sizes of CSR */ 584 iia[0] = 0; 585 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 586 587 /* overwrite entries corresponding to the Nedelec field */ 588 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 589 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 590 for (i=0;i<ne;i++) { 591 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 592 iia[idxs[i]+1] = ii[i+1]-ii[i]; 593 } 594 595 /* iia in CSR */ 596 for (i=0;i<n;i++) iia[i+1] += iia[i]; 597 598 /* jja in CSR */ 599 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 600 for (i=0;i<n;i++) 601 if (!PetscBTLookup(btf,i)) 602 for (j=0;j<iiu[i+1]-iiu[i];j++) 603 jja[iia[i]+j] = jju[iiu[i]+j]; 604 605 /* map edge dofs connectivity */ 606 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 607 for (i=0;i<ne;i++) { 608 PetscInt e = idxs[i]; 609 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 610 } 611 612 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 613 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 614 if (rest) { 615 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 616 } 617 if (free) { 618 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 619 } 620 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 621 } else { 622 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 623 } 624 } 625 626 /* Analyze interface for edge dofs */ 627 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 628 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 629 630 /* Get coarse edges in the edge space */ 631 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 632 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 633 634 if (fl2g) { 635 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 636 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 637 for (i=0;i<nee;i++) { 638 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 639 } 640 } else { 641 eedges = alleedges; 642 primals = allprimals; 643 } 644 645 /* Mark fine edge dofs with their coarse edge id */ 646 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 647 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 648 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 649 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 650 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 651 if (print) { 652 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 653 ierr = ISView(primals,NULL);CHKERRQ(ierr); 654 } 655 656 maxsize = 0; 657 for (i=0;i<nee;i++) { 658 PetscInt size,mark = i+1; 659 660 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 661 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 662 for (j=0;j<size;j++) marks[idxs[j]] = mark; 663 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 664 maxsize = PetscMax(maxsize,size); 665 } 666 667 /* Find coarse edge endpoints */ 668 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 669 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 670 for (i=0;i<nee;i++) { 671 PetscInt mark = i+1,size; 672 673 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 674 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 675 if (print) { 676 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 677 ISView(eedges[i],NULL); 678 } 679 for (j=0;j<size;j++) { 680 PetscInt k, ee = idxs[j]; 681 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 682 for (k=ii[ee];k<ii[ee+1];k++) { 683 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 684 if (PetscBTLookup(btv,jj[k])) { 685 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 686 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 687 PetscInt k2; 688 PetscBool corner = PETSC_FALSE; 689 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 690 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 691 /* it's a corner if either is connected with an edge dof belonging to a different cc or 692 if the edge dof lie on the natural part of the boundary */ 693 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 694 corner = PETSC_TRUE; 695 break; 696 } 697 } 698 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 699 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 700 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 701 } else { 702 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 703 } 704 } 705 } 706 } 707 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 708 } 709 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 710 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 711 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 712 713 /* Reset marked primal dofs */ 714 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 715 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 716 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 717 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 718 719 /* Now use the initial lG */ 720 ierr = MatDestroy(&lG);CHKERRQ(ierr); 721 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 722 lG = lGinit; 723 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 724 725 /* Compute extended cols indices */ 726 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 727 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 728 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 729 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 730 i *= maxsize; 731 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 732 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 733 eerr = PETSC_FALSE; 734 for (i=0;i<nee;i++) { 735 PetscInt size,found = 0; 736 737 cum = 0; 738 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 739 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 740 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 741 for (j=0;j<size;j++) { 742 PetscInt k,ee = idxs[j]; 743 for (k=ii[ee];k<ii[ee+1];k++) { 744 PetscInt vv = jj[k]; 745 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 746 else if (!PetscBTLookupSet(btvc,vv)) found++; 747 } 748 } 749 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 750 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 751 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 752 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 753 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 754 /* it may happen that endpoints are not defined at this point 755 if it is the case, mark this edge for a second pass */ 756 if (cum != size -1 || found != 2) { 757 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 758 if (print) { 759 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 760 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 761 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 762 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 763 } 764 eerr = PETSC_TRUE; 765 } 766 } 767 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 768 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 769 if (done) { 770 PetscInt *newprimals; 771 772 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 773 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 774 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 775 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 776 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 777 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 778 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 779 for (i=0;i<nee;i++) { 780 PetscBool has_candidates = PETSC_FALSE; 781 if (PetscBTLookup(bter,i)) { 782 PetscInt size,mark = i+1; 783 784 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 785 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 786 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 787 for (j=0;j<size;j++) { 788 PetscInt k,ee = idxs[j]; 789 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 790 for (k=ii[ee];k<ii[ee+1];k++) { 791 /* set all candidates located on the edge as corners */ 792 if (PetscBTLookup(btvcand,jj[k])) { 793 PetscInt k2,vv = jj[k]; 794 has_candidates = PETSC_TRUE; 795 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 796 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 797 /* set all edge dofs connected to candidate as primals */ 798 for (k2=iit[vv];k2<iit[vv+1];k2++) { 799 if (marks[jjt[k2]] == mark) { 800 PetscInt k3,ee2 = jjt[k2]; 801 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 802 newprimals[cum++] = ee2; 803 /* finally set the new corners */ 804 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 805 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 806 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 807 } 808 } 809 } 810 } else { 811 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 812 } 813 } 814 } 815 if (!has_candidates) { /* circular edge */ 816 PetscInt k, ee = idxs[0],*tmarks; 817 818 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 819 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 820 for (k=ii[ee];k<ii[ee+1];k++) { 821 PetscInt k2; 822 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 823 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 824 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 825 } 826 for (j=0;j<size;j++) { 827 if (tmarks[idxs[j]] > 1) { 828 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 829 newprimals[cum++] = idxs[j]; 830 } 831 } 832 ierr = PetscFree(tmarks);CHKERRQ(ierr); 833 } 834 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 835 } 836 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 837 } 838 ierr = PetscFree(extcols);CHKERRQ(ierr); 839 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 840 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 841 if (fl2g) { 842 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 843 ierr = ISDestroy(&primals);CHKERRQ(ierr); 844 for (i=0;i<nee;i++) { 845 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 846 } 847 ierr = PetscFree(eedges);CHKERRQ(ierr); 848 } 849 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 850 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 851 ierr = PetscFree(newprimals);CHKERRQ(ierr); 852 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 853 ierr = ISDestroy(&primals);CHKERRQ(ierr); 854 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 855 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 856 if (fl2g) { 857 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 858 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 859 for (i=0;i<nee;i++) { 860 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 861 } 862 } else { 863 eedges = alleedges; 864 primals = allprimals; 865 } 866 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 867 868 /* Mark again */ 869 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 } 878 if (print) { 879 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 880 ierr = ISView(primals,NULL);CHKERRQ(ierr); 881 } 882 883 /* Recompute extended cols */ 884 eerr = PETSC_FALSE; 885 for (i=0;i<nee;i++) { 886 PetscInt size; 887 888 cum = 0; 889 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 890 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 891 for (j=0;j<size;j++) { 892 PetscInt k,ee = idxs[j]; 893 for (k=ii[ee];k<ii[ee+1];k++) { 894 if (!PetscBTLookup(btv,jj[k])) { 895 extrow[cum++] = jj[k]; 896 } 897 } 898 } 899 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 900 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 901 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 902 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 903 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 904 if (cum != size -1) { 905 if (print) { 906 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 907 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 908 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 909 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 910 } 911 eerr = PETSC_TRUE; 912 } 913 } 914 } 915 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 916 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 917 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 918 /* an error should not occur at this point */ 919 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 920 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 921 922 /* Check the number of endpoints */ 923 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 924 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 925 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 926 for (i=0;i<nee;i++) { 927 PetscInt size, found = 0, gc[2]; 928 929 /* init with defaults */ 930 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 931 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 932 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 933 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 934 for (j=0;j<size;j++) { 935 PetscInt k,ee = idxs[j]; 936 for (k=ii[ee];k<ii[ee+1];k++) { 937 PetscInt vv = jj[k]; 938 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 939 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 940 corners[i*2+found++] = vv; 941 } 942 } 943 } 944 if (found != 2) { 945 PetscInt e; 946 if (fl2g) { 947 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 948 } else { 949 e = idxs[0]; 950 } 951 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 952 } 953 /* WARNING : this depends on how pcbddc->primal_indices_local_idxs is filled up in PCBDDConstraintsSetUp */ 954 cedges[i] = idxs[0]; 955 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 956 if (gc[0] > gc[1]) { 957 PetscInt swap = corners[2*i]; 958 corners[2*i] = corners[2*i+1]; 959 corners[2*i+1] = swap; 960 } 961 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 962 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 963 } 964 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 965 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 966 967 #if defined(PETSC_USE_DEBUG) 968 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 969 not interfere with neighbouring coarse edges */ 970 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 971 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 972 for (i=0;i<nv;i++) { 973 PetscInt emax = 0,eemax = 0; 974 975 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 976 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 977 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 978 for (j=1;j<nee+1;j++) { 979 if (emax < emarks[j]) { 980 emax = emarks[j]; 981 eemax = j; 982 } 983 } 984 /* not relevant for edges */ 985 if (!eemax) continue; 986 987 for (j=ii[i];j<ii[i+1];j++) { 988 if (marks[jj[j]] && marks[jj[j]] != eemax) { 989 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 990 } 991 } 992 } 993 ierr = PetscFree(emarks);CHKERRQ(ierr); 994 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 995 #endif 996 997 /* Compute extended rows indices for edge blocks of the change of basis */ 998 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 999 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1000 extmem *= maxsize; 1001 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1002 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1003 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1004 for (i=0;i<nv;i++) { 1005 PetscInt mark = 0,size,start; 1006 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1007 for (j=ii[i];j<ii[i+1];j++) 1008 if (marks[jj[j]] && !mark) 1009 mark = marks[jj[j]]; 1010 1011 /* not relevant */ 1012 if (!mark) continue; 1013 1014 /* import extended row */ 1015 mark--; 1016 start = mark*extmem+extrowcum[mark]; 1017 size = ii[i+1]-ii[i]; 1018 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1019 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1020 extrowcum[mark] += size; 1021 } 1022 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1023 cum = 0; 1024 for (i=0;i<nee;i++) { 1025 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1026 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1027 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1028 cum = PetscMax(cum,size); 1029 } 1030 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1031 ierr = PetscFree(marks);CHKERRQ(ierr); 1032 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1033 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1034 1035 /* Workspace for lapack inner calls and VecSetValues */ 1036 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1037 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1038 for (i=0;i<maxsize;i++) vals[i] = 1.; 1039 1040 /* Create vectors for quadrature rules */ 1041 /* TODO preserve other quadratures */ 1042 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 1043 for (i=0;i<nquads;i++) { 1044 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 1045 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 1046 } 1047 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 1048 1049 /* Create change of basis matrix (preallocation can be improved) */ 1050 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1051 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1052 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1053 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1054 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1055 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1056 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 1057 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1058 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1059 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1060 1061 /* Defaults to identity */ 1062 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1063 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1064 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1065 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1066 1067 for (i=0;i<nee;i++) { 1068 Mat Gins = NULL, GKins = NULL; 1069 1070 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 1071 if (Gins && GKins) { 1072 PetscScalar *data; 1073 const PetscInt *rows,*cols; 1074 PetscInt nrh,nch,nrc,ncc; 1075 1076 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1077 /* H1 */ 1078 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1079 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1080 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1081 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1082 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1083 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1084 /* complement */ 1085 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1086 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 1087 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 1088 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1089 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1090 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1091 /* Gins kernel quadratures */ 1092 for (j=0;j<ncc;j++) { 1093 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 1094 } 1095 /* H1 average */ 1096 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1097 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1098 } 1099 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1100 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1101 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1102 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1103 } 1104 1105 /* Start assembling */ 1106 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1107 for (i=0;i<nquads;i++) { 1108 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 1109 } 1110 1111 /* Free */ 1112 if (fl2g) { 1113 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1114 for (i=0;i<nee;i++) { 1115 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1116 } 1117 ierr = PetscFree(eedges);CHKERRQ(ierr); 1118 } 1119 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1120 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1121 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1122 ierr = PetscFree(extrow);CHKERRQ(ierr); 1123 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1124 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1125 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1126 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1127 ierr = PetscFree(vals);CHKERRQ(ierr); 1128 ierr = PetscFree(corners);CHKERRQ(ierr); 1129 ierr = PetscFree(cedges);CHKERRQ(ierr); 1130 ierr = PetscFree(extrows);CHKERRQ(ierr); 1131 ierr = PetscFree(extcols);CHKERRQ(ierr); 1132 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1133 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1134 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1135 1136 /* Complete assembling */ 1137 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1138 for (i=0;i<nquads;i++) { 1139 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 1140 } 1141 for (i=0;i<nquads;i++) { 1142 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 1143 } 1144 ierr = PetscFree(quads);CHKERRQ(ierr); 1145 1146 /* set change of basis */ 1147 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1148 ierr = MatDestroy(&T);CHKERRQ(ierr); 1149 1150 /* set quadratures */ 1151 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 1152 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 1153 1154 PetscFunctionReturn(0); 1155 } 1156 1157 /* the near-null space of BDDC carries information on quadrature weights, 1158 and these can be collinear -> so cheat with MatNullSpaceCreate 1159 and create a suitable set of basis vectors first */ 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1162 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1163 { 1164 PetscErrorCode ierr; 1165 PetscInt i; 1166 1167 PetscFunctionBegin; 1168 for (i=0;i<nvecs;i++) { 1169 PetscInt first,last; 1170 1171 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1172 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1173 if (i>=first && i < last) { 1174 PetscScalar *data; 1175 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1176 if (!has_const) { 1177 data[i-first] = 1.; 1178 } else { 1179 data[2*i-first] = 1./PetscSqrtReal(2.); 1180 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1181 } 1182 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1183 } 1184 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1185 } 1186 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1187 for (i=0;i<nvecs;i++) { /* reset vectors */ 1188 PetscInt first,last; 1189 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1190 if (i>=first && i < last) { 1191 PetscScalar *data; 1192 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1193 if (!has_const) { 1194 data[i-first] = 0.; 1195 } else { 1196 data[2*i-first] = 0.; 1197 data[2*i-first+1] = 0.; 1198 } 1199 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1200 } 1201 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1202 } 1203 PetscFunctionReturn(0); 1204 } 1205 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1208 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1209 { 1210 Mat loc_divudotp; 1211 Vec p,v,vins,quad_vec,*quad_vecs; 1212 ISLocalToGlobalMapping map; 1213 IS *faces,*edges; 1214 PetscScalar *vals; 1215 const PetscScalar *array; 1216 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1217 PetscMPIInt rank; 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1222 if (graph->twodim) { 1223 lmaxneighs = 2; 1224 } else { 1225 lmaxneighs = 1; 1226 for (i=0;i<ne;i++) { 1227 const PetscInt *idxs; 1228 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1229 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1230 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1231 } 1232 lmaxneighs++; /* graph count does not include self */ 1233 } 1234 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1235 maxsize = 0; 1236 for (i=0;i<ne;i++) { 1237 PetscInt nn; 1238 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1239 maxsize = PetscMax(maxsize,nn); 1240 } 1241 for (i=0;i<nf;i++) { 1242 PetscInt nn; 1243 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1244 maxsize = PetscMax(maxsize,nn); 1245 } 1246 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1247 /* create vectors to hold quadrature weights */ 1248 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1249 if (!transpose) { 1250 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1251 } else { 1252 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1253 } 1254 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1255 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1256 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1257 for (i=0;i<maxneighs;i++) { 1258 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1259 } 1260 1261 /* compute local quad vec */ 1262 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1263 if (!transpose) { 1264 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1265 } else { 1266 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1267 } 1268 ierr = VecSet(p,1.);CHKERRQ(ierr); 1269 if (!transpose) { 1270 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1271 } else { 1272 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1273 } 1274 if (vl2l) { 1275 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1276 } else { 1277 vins = v; 1278 } 1279 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1280 ierr = VecDestroy(&p);CHKERRQ(ierr); 1281 1282 /* insert in global quadrature vecs */ 1283 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1284 for (i=0;i<nf;i++) { 1285 const PetscInt *idxs; 1286 PetscInt idx,nn,j; 1287 1288 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1289 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1290 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1291 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1292 idx = -(idx+1); 1293 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1294 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1295 } 1296 for (i=0;i<ne;i++) { 1297 const PetscInt *idxs; 1298 PetscInt idx,nn,j; 1299 1300 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1301 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1302 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1303 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1304 idx = -(idx+1); 1305 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1306 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1307 } 1308 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1309 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1310 if (vl2l) { 1311 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1312 } 1313 ierr = VecDestroy(&v);CHKERRQ(ierr); 1314 ierr = PetscFree(vals);CHKERRQ(ierr); 1315 1316 /* assemble near null space */ 1317 for (i=0;i<maxneighs;i++) { 1318 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1319 } 1320 for (i=0;i<maxneighs;i++) { 1321 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1322 } 1323 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1330 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1331 { 1332 PetscErrorCode ierr; 1333 Vec local,global; 1334 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1335 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1336 1337 PetscFunctionBegin; 1338 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1339 /* need to convert from global to local topology information and remove references to information in global ordering */ 1340 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1341 if (pcbddc->user_provided_isfordofs) { 1342 if (pcbddc->n_ISForDofs) { 1343 PetscInt i; 1344 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1345 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1346 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1347 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1348 } 1349 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1350 pcbddc->n_ISForDofs = 0; 1351 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1352 } 1353 } else { 1354 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1355 PetscInt i, n = matis->A->rmap->n; 1356 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1357 if (i > 1) { 1358 pcbddc->n_ISForDofsLocal = i; 1359 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1360 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1361 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1362 } 1363 } 1364 } 1365 } 1366 1367 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1368 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1369 } 1370 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1371 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1372 } 1373 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1374 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1375 } 1376 ierr = VecDestroy(&global);CHKERRQ(ierr); 1377 ierr = VecDestroy(&local);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1383 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1384 { 1385 PC_IS *pcis = (PC_IS*)(pc->data); 1386 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1387 PetscErrorCode ierr; 1388 1389 PetscFunctionBegin; 1390 if (!pcbddc->benign_have_null) { 1391 PetscFunctionReturn(0); 1392 } 1393 if (pcbddc->ChangeOfBasisMatrix) { 1394 Vec swap; 1395 1396 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1397 swap = pcbddc->work_change; 1398 pcbddc->work_change = r; 1399 r = swap; 1400 } 1401 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1402 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1403 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1404 ierr = VecSet(z,0.);CHKERRQ(ierr); 1405 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1406 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1407 if (pcbddc->ChangeOfBasisMatrix) { 1408 pcbddc->work_change = r; 1409 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1410 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1411 } 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1417 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1418 { 1419 PCBDDCBenignMatMult_ctx ctx; 1420 PetscErrorCode ierr; 1421 PetscBool apply_right,apply_left,reset_x; 1422 1423 PetscFunctionBegin; 1424 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1425 if (transpose) { 1426 apply_right = ctx->apply_left; 1427 apply_left = ctx->apply_right; 1428 } else { 1429 apply_right = ctx->apply_right; 1430 apply_left = ctx->apply_left; 1431 } 1432 reset_x = PETSC_FALSE; 1433 if (apply_right) { 1434 const PetscScalar *ax; 1435 PetscInt nl,i; 1436 1437 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1438 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1439 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1440 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1441 for (i=0;i<ctx->benign_n;i++) { 1442 PetscScalar sum,val; 1443 const PetscInt *idxs; 1444 PetscInt nz,j; 1445 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1446 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1447 sum = 0.; 1448 if (ctx->apply_p0) { 1449 val = ctx->work[idxs[nz-1]]; 1450 for (j=0;j<nz-1;j++) { 1451 sum += ctx->work[idxs[j]]; 1452 ctx->work[idxs[j]] += val; 1453 } 1454 } else { 1455 for (j=0;j<nz-1;j++) { 1456 sum += ctx->work[idxs[j]]; 1457 } 1458 } 1459 ctx->work[idxs[nz-1]] -= sum; 1460 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1461 } 1462 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1463 reset_x = PETSC_TRUE; 1464 } 1465 if (transpose) { 1466 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1467 } else { 1468 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1469 } 1470 if (reset_x) { 1471 ierr = VecResetArray(x);CHKERRQ(ierr); 1472 } 1473 if (apply_left) { 1474 PetscScalar *ay; 1475 PetscInt i; 1476 1477 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1478 for (i=0;i<ctx->benign_n;i++) { 1479 PetscScalar sum,val; 1480 const PetscInt *idxs; 1481 PetscInt nz,j; 1482 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1483 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1484 val = -ay[idxs[nz-1]]; 1485 if (ctx->apply_p0) { 1486 sum = 0.; 1487 for (j=0;j<nz-1;j++) { 1488 sum += ay[idxs[j]]; 1489 ay[idxs[j]] += val; 1490 } 1491 ay[idxs[nz-1]] += sum; 1492 } else { 1493 for (j=0;j<nz-1;j++) { 1494 ay[idxs[j]] += val; 1495 } 1496 ay[idxs[nz-1]] = 0.; 1497 } 1498 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1499 } 1500 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1507 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1508 { 1509 PetscErrorCode ierr; 1510 1511 PetscFunctionBegin; 1512 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1518 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1519 { 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "PCBDDCBenignShellMat" 1529 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1530 { 1531 PC_IS *pcis = (PC_IS*)pc->data; 1532 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1533 PCBDDCBenignMatMult_ctx ctx; 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 if (!restore) { 1538 Mat A_IB,A_BI; 1539 PetscScalar *work; 1540 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1541 1542 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1543 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1544 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1545 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1546 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1547 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1548 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1549 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1550 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1551 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1552 ctx->apply_left = PETSC_TRUE; 1553 ctx->apply_right = PETSC_FALSE; 1554 ctx->apply_p0 = PETSC_FALSE; 1555 ctx->benign_n = pcbddc->benign_n; 1556 if (reuse) { 1557 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1558 ctx->free = PETSC_FALSE; 1559 } else { /* TODO: could be optimized for successive solves */ 1560 ISLocalToGlobalMapping N_to_D; 1561 PetscInt i; 1562 1563 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1564 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1565 for (i=0;i<pcbddc->benign_n;i++) { 1566 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1567 } 1568 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1569 ctx->free = PETSC_TRUE; 1570 } 1571 ctx->A = pcis->A_IB; 1572 ctx->work = work; 1573 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1574 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1575 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1576 pcis->A_IB = A_IB; 1577 1578 /* A_BI as A_IB^T */ 1579 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1580 pcbddc->benign_original_mat = pcis->A_BI; 1581 pcis->A_BI = A_BI; 1582 } else { 1583 if (!pcbddc->benign_original_mat) { 1584 PetscFunctionReturn(0); 1585 } 1586 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1587 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1588 pcis->A_IB = ctx->A; 1589 ctx->A = NULL; 1590 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1591 pcis->A_BI = pcbddc->benign_original_mat; 1592 pcbddc->benign_original_mat = NULL; 1593 if (ctx->free) { 1594 PetscInt i; 1595 for (i=0;i<ctx->benign_n;i++) { 1596 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1597 } 1598 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1599 } 1600 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1601 ierr = PetscFree(ctx);CHKERRQ(ierr); 1602 } 1603 PetscFunctionReturn(0); 1604 } 1605 1606 /* used just in bddc debug mode */ 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "PCBDDCBenignProject" 1609 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1610 { 1611 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1612 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1613 Mat An; 1614 PetscErrorCode ierr; 1615 1616 PetscFunctionBegin; 1617 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1618 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1619 if (is1) { 1620 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1621 ierr = MatDestroy(&An);CHKERRQ(ierr); 1622 } else { 1623 *B = An; 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 /* TODO: add reuse flag */ 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatSeqAIJCompress" 1631 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1632 { 1633 Mat Bt; 1634 PetscScalar *a,*bdata; 1635 const PetscInt *ii,*ij; 1636 PetscInt m,n,i,nnz,*bii,*bij; 1637 PetscBool flg_row; 1638 PetscErrorCode ierr; 1639 1640 PetscFunctionBegin; 1641 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1642 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1643 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1644 nnz = n; 1645 for (i=0;i<ii[n];i++) { 1646 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1647 } 1648 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1649 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1650 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1651 nnz = 0; 1652 bii[0] = 0; 1653 for (i=0;i<n;i++) { 1654 PetscInt j; 1655 for (j=ii[i];j<ii[i+1];j++) { 1656 PetscScalar entry = a[j]; 1657 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1658 bij[nnz] = ij[j]; 1659 bdata[nnz] = entry; 1660 nnz++; 1661 } 1662 } 1663 bii[i+1] = nnz; 1664 } 1665 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1666 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1667 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1668 { 1669 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1670 b->free_a = PETSC_TRUE; 1671 b->free_ij = PETSC_TRUE; 1672 } 1673 *B = Bt; 1674 PetscFunctionReturn(0); 1675 } 1676 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatDetectDisconnectedComponents" 1679 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1680 { 1681 Mat B; 1682 IS is_dummy,*cc_n; 1683 ISLocalToGlobalMapping l2gmap_dummy; 1684 PCBDDCGraph graph; 1685 PetscInt i,n; 1686 PetscInt *xadj,*adjncy; 1687 PetscInt *xadj_filtered,*adjncy_filtered; 1688 PetscBool flg_row,isseqaij; 1689 PetscErrorCode ierr; 1690 1691 PetscFunctionBegin; 1692 if (!A->rmap->N || !A->cmap->N) { 1693 *ncc = 0; 1694 *cc = NULL; 1695 PetscFunctionReturn(0); 1696 } 1697 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1698 if (!isseqaij && filter) { 1699 PetscBool isseqdense; 1700 1701 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1702 if (!isseqdense) { 1703 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1704 } else { /* TODO: rectangular case and LDA */ 1705 PetscScalar *array; 1706 PetscReal chop=1.e-6; 1707 1708 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1709 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1710 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1711 for (i=0;i<n;i++) { 1712 PetscInt j; 1713 for (j=i+1;j<n;j++) { 1714 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1715 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1716 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1717 } 1718 } 1719 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1720 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1721 } 1722 } else { 1723 B = A; 1724 } 1725 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1726 1727 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1728 if (filter) { 1729 PetscScalar *data; 1730 PetscInt j,cum; 1731 1732 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1733 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1734 cum = 0; 1735 for (i=0;i<n;i++) { 1736 PetscInt t; 1737 1738 for (j=xadj[i];j<xadj[i+1];j++) { 1739 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1740 continue; 1741 } 1742 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1743 } 1744 t = xadj_filtered[i]; 1745 xadj_filtered[i] = cum; 1746 cum += t; 1747 } 1748 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1749 } else { 1750 xadj_filtered = NULL; 1751 adjncy_filtered = NULL; 1752 } 1753 1754 /* compute local connected components using PCBDDCGraph */ 1755 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1756 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1757 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1758 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1759 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1760 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1761 if (xadj_filtered) { 1762 graph->xadj = xadj_filtered; 1763 graph->adjncy = adjncy_filtered; 1764 } else { 1765 graph->xadj = xadj; 1766 graph->adjncy = adjncy; 1767 } 1768 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1769 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1770 /* partial clean up */ 1771 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1772 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1773 if (A != B) { 1774 ierr = MatDestroy(&B);CHKERRQ(ierr); 1775 } 1776 1777 /* get back data */ 1778 if (ncc) *ncc = graph->ncc; 1779 if (cc) { 1780 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1781 for (i=0;i<graph->ncc;i++) { 1782 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1783 } 1784 *cc = cc_n; 1785 } 1786 /* clean up graph */ 1787 graph->xadj = 0; 1788 graph->adjncy = 0; 1789 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "PCBDDCBenignCheck" 1795 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1796 { 1797 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1798 PC_IS* pcis = (PC_IS*)(pc->data); 1799 IS dirIS = NULL; 1800 PetscInt i; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1805 if (zerodiag) { 1806 Mat A; 1807 Vec vec3_N; 1808 PetscScalar *vals; 1809 const PetscInt *idxs; 1810 PetscInt nz,*count; 1811 1812 /* p0 */ 1813 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1814 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1815 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1816 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1817 for (i=0;i<nz;i++) vals[i] = 1.; 1818 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1819 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1820 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1821 /* v_I */ 1822 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1823 for (i=0;i<nz;i++) vals[i] = 0.; 1824 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1825 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1826 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1827 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1828 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1829 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1830 if (dirIS) { 1831 PetscInt n; 1832 1833 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1834 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1835 for (i=0;i<n;i++) vals[i] = 0.; 1836 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1837 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1838 } 1839 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1840 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1841 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1842 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1843 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1844 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1845 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1846 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1847 ierr = PetscFree(vals);CHKERRQ(ierr); 1848 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1849 1850 /* there should not be any pressure dofs lying on the interface */ 1851 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1852 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1853 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1854 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1855 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1856 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1857 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1858 ierr = PetscFree(count);CHKERRQ(ierr); 1859 } 1860 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1861 1862 /* check PCBDDCBenignGetOrSetP0 */ 1863 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1864 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1865 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1866 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1867 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1868 for (i=0;i<pcbddc->benign_n;i++) { 1869 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1870 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1871 } 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1877 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1878 { 1879 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1880 IS pressures,zerodiag,*zerodiag_subs; 1881 PetscInt nz,n; 1882 PetscInt *interior_dofs,n_interior_dofs; 1883 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1884 PetscErrorCode ierr; 1885 1886 PetscFunctionBegin; 1887 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1888 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1889 for (n=0;n<pcbddc->benign_n;n++) { 1890 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1891 } 1892 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1893 pcbddc->benign_n = 0; 1894 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1895 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1896 Checks if all the pressure dofs in each subdomain have a zero diagonal 1897 If not, a change of basis on pressures is not needed 1898 since the local Schur complements are already SPD 1899 */ 1900 has_null_pressures = PETSC_TRUE; 1901 have_null = PETSC_TRUE; 1902 if (pcbddc->n_ISForDofsLocal) { 1903 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1904 1905 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1906 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1907 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1908 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1909 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1910 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1911 if (!sorted) { 1912 ierr = ISSort(pressures);CHKERRQ(ierr); 1913 } 1914 } else { 1915 pressures = NULL; 1916 } 1917 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1918 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1919 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1920 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1921 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1922 if (!sorted) { 1923 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1924 } 1925 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1926 if (!nz) { 1927 if (n) have_null = PETSC_FALSE; 1928 has_null_pressures = PETSC_FALSE; 1929 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1930 } 1931 recompute_zerodiag = PETSC_FALSE; 1932 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1933 zerodiag_subs = NULL; 1934 pcbddc->benign_n = 0; 1935 n_interior_dofs = 0; 1936 interior_dofs = NULL; 1937 if (pcbddc->current_level) { /* need to compute interior nodes */ 1938 PetscInt n,i,j; 1939 PetscInt n_neigh,*neigh,*n_shared,**shared; 1940 PetscInt *iwork; 1941 1942 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1943 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1944 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1945 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1946 for (i=1;i<n_neigh;i++) 1947 for (j=0;j<n_shared[i];j++) 1948 iwork[shared[i][j]] += 1; 1949 for (i=0;i<n;i++) 1950 if (!iwork[i]) 1951 interior_dofs[n_interior_dofs++] = i; 1952 ierr = PetscFree(iwork);CHKERRQ(ierr); 1953 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1954 } 1955 if (has_null_pressures) { 1956 IS *subs; 1957 PetscInt nsubs,i,j,nl; 1958 const PetscInt *idxs; 1959 PetscScalar *array; 1960 Vec *work; 1961 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1962 1963 subs = pcbddc->local_subs; 1964 nsubs = pcbddc->n_local_subs; 1965 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1966 if (pcbddc->current_level) { 1967 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1968 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1969 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1970 /* work[0] = 1_p */ 1971 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1972 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1973 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1974 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1975 /* work[0] = 1_v */ 1976 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1977 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1978 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1979 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1980 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1981 } 1982 if (nsubs > 1) { 1983 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1984 for (i=0;i<nsubs;i++) { 1985 ISLocalToGlobalMapping l2g; 1986 IS t_zerodiag_subs; 1987 PetscInt nl; 1988 1989 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1990 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1991 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1992 if (nl) { 1993 PetscBool valid = PETSC_TRUE; 1994 1995 if (pcbddc->current_level) { 1996 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1997 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1998 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1999 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2000 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2001 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2002 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2003 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2004 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2005 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2006 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2007 for (j=0;j<n_interior_dofs;j++) { 2008 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2009 valid = PETSC_FALSE; 2010 break; 2011 } 2012 } 2013 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2014 } 2015 if (valid && pcbddc->NeumannBoundariesLocal) { 2016 IS t_bc; 2017 PetscInt nzb; 2018 2019 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 2020 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 2021 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 2022 if (nzb) valid = PETSC_FALSE; 2023 } 2024 if (valid && pressures) { 2025 IS t_pressure_subs; 2026 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2027 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2028 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2029 } 2030 if (valid) { 2031 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2032 pcbddc->benign_n++; 2033 } else { 2034 recompute_zerodiag = PETSC_TRUE; 2035 } 2036 } 2037 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2038 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2039 } 2040 } else { /* there's just one subdomain (or zero if they have not been detected */ 2041 PetscBool valid = PETSC_TRUE; 2042 2043 if (pcbddc->NeumannBoundariesLocal) { 2044 PetscInt nzb; 2045 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 2046 if (nzb) valid = PETSC_FALSE; 2047 } 2048 if (valid && pressures) { 2049 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2050 } 2051 if (valid && pcbddc->current_level) { 2052 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2053 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2054 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2055 for (j=0;j<n_interior_dofs;j++) { 2056 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2057 valid = PETSC_FALSE; 2058 break; 2059 } 2060 } 2061 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2062 } 2063 if (valid) { 2064 pcbddc->benign_n = 1; 2065 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2066 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2067 zerodiag_subs[0] = zerodiag; 2068 } 2069 } 2070 if (pcbddc->current_level) { 2071 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2072 } 2073 } 2074 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2075 2076 if (!pcbddc->benign_n) { 2077 PetscInt n; 2078 2079 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2080 recompute_zerodiag = PETSC_FALSE; 2081 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2082 if (n) { 2083 has_null_pressures = PETSC_FALSE; 2084 have_null = PETSC_FALSE; 2085 } 2086 } 2087 2088 /* final check for null pressures */ 2089 if (zerodiag && pressures) { 2090 PetscInt nz,np; 2091 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2092 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2093 if (nz != np) have_null = PETSC_FALSE; 2094 } 2095 2096 if (recompute_zerodiag) { 2097 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2098 if (pcbddc->benign_n == 1) { 2099 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2100 zerodiag = zerodiag_subs[0]; 2101 } else { 2102 PetscInt i,nzn,*new_idxs; 2103 2104 nzn = 0; 2105 for (i=0;i<pcbddc->benign_n;i++) { 2106 PetscInt ns; 2107 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2108 nzn += ns; 2109 } 2110 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2111 nzn = 0; 2112 for (i=0;i<pcbddc->benign_n;i++) { 2113 PetscInt ns,*idxs; 2114 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2115 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2116 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2117 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2118 nzn += ns; 2119 } 2120 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2121 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2122 } 2123 have_null = PETSC_FALSE; 2124 } 2125 2126 /* Prepare matrix to compute no-net-flux */ 2127 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2128 Mat A,loc_divudotp; 2129 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2130 IS row,col,isused = NULL; 2131 PetscInt M,N,n,st,n_isused; 2132 2133 if (pressures) { 2134 isused = pressures; 2135 } else { 2136 isused = zerodiag; 2137 } 2138 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2139 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2140 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2141 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2142 n_isused = 0; 2143 if (isused) { 2144 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2145 } 2146 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2147 st = st-n_isused; 2148 if (n) { 2149 const PetscInt *gidxs; 2150 2151 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2152 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2153 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2154 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2155 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2156 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2157 } else { 2158 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2159 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2160 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2161 } 2162 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2163 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2164 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2165 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2166 ierr = ISDestroy(&row);CHKERRQ(ierr); 2167 ierr = ISDestroy(&col);CHKERRQ(ierr); 2168 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2169 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2170 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2171 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2172 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2173 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2174 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2175 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2176 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 } 2179 2180 /* change of basis and p0 dofs */ 2181 if (has_null_pressures) { 2182 IS zerodiagc; 2183 const PetscInt *idxs,*idxsc; 2184 PetscInt i,s,*nnz; 2185 2186 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2187 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2188 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2189 /* local change of basis for pressures */ 2190 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2191 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2192 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2193 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2194 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2195 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2196 for (i=0;i<pcbddc->benign_n;i++) { 2197 PetscInt nzs,j; 2198 2199 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2200 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2201 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2202 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2203 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2204 } 2205 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2206 ierr = PetscFree(nnz);CHKERRQ(ierr); 2207 /* set identity on velocities */ 2208 for (i=0;i<n-nz;i++) { 2209 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2210 } 2211 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2212 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2213 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2214 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2215 /* set change on pressures */ 2216 for (s=0;s<pcbddc->benign_n;s++) { 2217 PetscScalar *array; 2218 PetscInt nzs; 2219 2220 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2221 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2222 for (i=0;i<nzs-1;i++) { 2223 PetscScalar vals[2]; 2224 PetscInt cols[2]; 2225 2226 cols[0] = idxs[i]; 2227 cols[1] = idxs[nzs-1]; 2228 vals[0] = 1.; 2229 vals[1] = 1.; 2230 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2231 } 2232 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2233 for (i=0;i<nzs-1;i++) array[i] = -1.; 2234 array[nzs-1] = 1.; 2235 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2236 /* store local idxs for p0 */ 2237 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2238 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2239 ierr = PetscFree(array);CHKERRQ(ierr); 2240 } 2241 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2242 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2243 /* project if needed */ 2244 if (pcbddc->benign_change_explicit) { 2245 Mat M; 2246 2247 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2248 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2249 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2250 ierr = MatDestroy(&M);CHKERRQ(ierr); 2251 } 2252 /* store global idxs for p0 */ 2253 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2254 } 2255 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2256 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2257 2258 /* determines if the coarse solver will be singular or not */ 2259 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2260 /* determines if the problem has subdomains with 0 pressure block */ 2261 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2262 *zerodiaglocal = zerodiag; 2263 PetscFunctionReturn(0); 2264 } 2265 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2268 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2269 { 2270 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2271 PetscScalar *array; 2272 PetscErrorCode ierr; 2273 2274 PetscFunctionBegin; 2275 if (!pcbddc->benign_sf) { 2276 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2277 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2278 } 2279 if (get) { 2280 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2281 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2282 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2283 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2284 } else { 2285 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2286 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2287 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2288 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2295 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2296 { 2297 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2298 PetscErrorCode ierr; 2299 2300 PetscFunctionBegin; 2301 /* TODO: add error checking 2302 - avoid nested pop (or push) calls. 2303 - cannot push before pop. 2304 - cannot call this if pcbddc->local_mat is NULL 2305 */ 2306 if (!pcbddc->benign_n) { 2307 PetscFunctionReturn(0); 2308 } 2309 if (pop) { 2310 if (pcbddc->benign_change_explicit) { 2311 IS is_p0; 2312 MatReuse reuse; 2313 2314 /* extract B_0 */ 2315 reuse = MAT_INITIAL_MATRIX; 2316 if (pcbddc->benign_B0) { 2317 reuse = MAT_REUSE_MATRIX; 2318 } 2319 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2320 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2321 /* remove rows and cols from local problem */ 2322 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2323 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2324 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2325 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2326 } else { 2327 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2328 PetscScalar *vals; 2329 PetscInt i,n,*idxs_ins; 2330 2331 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2332 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2333 if (!pcbddc->benign_B0) { 2334 PetscInt *nnz; 2335 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2336 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2337 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2338 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2339 for (i=0;i<pcbddc->benign_n;i++) { 2340 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2341 nnz[i] = n - nnz[i]; 2342 } 2343 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2344 ierr = PetscFree(nnz);CHKERRQ(ierr); 2345 } 2346 2347 for (i=0;i<pcbddc->benign_n;i++) { 2348 PetscScalar *array; 2349 PetscInt *idxs,j,nz,cum; 2350 2351 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2352 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2353 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2354 for (j=0;j<nz;j++) vals[j] = 1.; 2355 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2356 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2357 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2358 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2359 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2360 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2361 cum = 0; 2362 for (j=0;j<n;j++) { 2363 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2364 vals[cum] = array[j]; 2365 idxs_ins[cum] = j; 2366 cum++; 2367 } 2368 } 2369 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2370 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2371 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2372 } 2373 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2374 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2375 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2376 } 2377 } else { /* push */ 2378 if (pcbddc->benign_change_explicit) { 2379 PetscInt i; 2380 2381 for (i=0;i<pcbddc->benign_n;i++) { 2382 PetscScalar *B0_vals; 2383 PetscInt *B0_cols,B0_ncol; 2384 2385 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2386 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2387 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2388 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2389 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2390 } 2391 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2392 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 } else { 2394 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2395 } 2396 } 2397 PetscFunctionReturn(0); 2398 } 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2402 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2403 { 2404 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2405 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2406 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2407 PetscBLASInt *B_iwork,*B_ifail; 2408 PetscScalar *work,lwork; 2409 PetscScalar *St,*S,*eigv; 2410 PetscScalar *Sarray,*Starray; 2411 PetscReal *eigs,thresh; 2412 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2413 PetscBool allocated_S_St; 2414 #if defined(PETSC_USE_COMPLEX) 2415 PetscReal *rwork; 2416 #endif 2417 PetscErrorCode ierr; 2418 2419 PetscFunctionBegin; 2420 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2421 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2422 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2423 2424 if (pcbddc->dbg_flag) { 2425 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2426 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2427 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2428 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2429 } 2430 2431 if (pcbddc->dbg_flag) { 2432 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2433 } 2434 2435 /* max size of subsets */ 2436 mss = 0; 2437 for (i=0;i<sub_schurs->n_subs;i++) { 2438 PetscInt subset_size; 2439 2440 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2441 mss = PetscMax(mss,subset_size); 2442 } 2443 2444 /* min/max and threshold */ 2445 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2446 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2447 nmax = PetscMax(nmin,nmax); 2448 allocated_S_St = PETSC_FALSE; 2449 if (nmin) { 2450 allocated_S_St = PETSC_TRUE; 2451 } 2452 2453 /* allocate lapack workspace */ 2454 cum = cum2 = 0; 2455 maxneigs = 0; 2456 for (i=0;i<sub_schurs->n_subs;i++) { 2457 PetscInt n,subset_size; 2458 2459 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2460 n = PetscMin(subset_size,nmax); 2461 cum += subset_size; 2462 cum2 += subset_size*n; 2463 maxneigs = PetscMax(maxneigs,n); 2464 } 2465 if (mss) { 2466 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2467 PetscBLASInt B_itype = 1; 2468 PetscBLASInt B_N = mss; 2469 PetscReal zero = 0.0; 2470 PetscReal eps = 0.0; /* dlamch? */ 2471 2472 B_lwork = -1; 2473 S = NULL; 2474 St = NULL; 2475 eigs = NULL; 2476 eigv = NULL; 2477 B_iwork = NULL; 2478 B_ifail = NULL; 2479 #if defined(PETSC_USE_COMPLEX) 2480 rwork = NULL; 2481 #endif 2482 thresh = 1.0; 2483 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2484 #if defined(PETSC_USE_COMPLEX) 2485 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2486 #else 2487 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2488 #endif 2489 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2490 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2491 } else { 2492 /* TODO */ 2493 } 2494 } else { 2495 lwork = 0; 2496 } 2497 2498 nv = 0; 2499 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2500 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2501 } 2502 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2503 if (allocated_S_St) { 2504 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2505 } 2506 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2507 #if defined(PETSC_USE_COMPLEX) 2508 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2509 #endif 2510 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2511 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2512 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2513 nv+cum,&pcbddc->adaptive_constraints_idxs, 2514 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2515 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2516 2517 maxneigs = 0; 2518 cum = cumarray = 0; 2519 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2520 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2521 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2522 const PetscInt *idxs; 2523 2524 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2525 for (cum=0;cum<nv;cum++) { 2526 pcbddc->adaptive_constraints_n[cum] = 1; 2527 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2528 pcbddc->adaptive_constraints_data[cum] = 1.0; 2529 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2530 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2531 } 2532 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2533 } 2534 2535 if (mss) { /* multilevel */ 2536 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2537 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2538 } 2539 2540 thresh = pcbddc->adaptive_threshold; 2541 for (i=0;i<sub_schurs->n_subs;i++) { 2542 const PetscInt *idxs; 2543 PetscReal upper,lower; 2544 PetscInt j,subset_size,eigs_start = 0; 2545 PetscBLASInt B_N; 2546 PetscBool same_data = PETSC_FALSE; 2547 2548 if (pcbddc->use_deluxe_scaling) { 2549 upper = PETSC_MAX_REAL; 2550 lower = thresh; 2551 } else { 2552 upper = 1./thresh; 2553 lower = 0.; 2554 } 2555 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2556 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2557 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2558 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2559 if (sub_schurs->is_hermitian) { 2560 PetscInt j,k; 2561 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2562 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2563 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2564 } 2565 for (j=0;j<subset_size;j++) { 2566 for (k=j;k<subset_size;k++) { 2567 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2568 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2569 } 2570 } 2571 } else { 2572 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2573 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2574 } 2575 } else { 2576 S = Sarray + cumarray; 2577 St = Starray + cumarray; 2578 } 2579 /* see if we can save some work */ 2580 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2581 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2582 } 2583 2584 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2585 B_neigs = 0; 2586 } else { 2587 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2588 PetscBLASInt B_itype = 1; 2589 PetscBLASInt B_IL, B_IU; 2590 PetscReal eps = -1.0; /* dlamch? */ 2591 PetscInt nmin_s; 2592 PetscBool compute_range = PETSC_FALSE; 2593 2594 if (pcbddc->dbg_flag) { 2595 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2596 } 2597 2598 compute_range = PETSC_FALSE; 2599 if (thresh > 1.+PETSC_SMALL && !same_data) { 2600 compute_range = PETSC_TRUE; 2601 } 2602 2603 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2604 if (compute_range) { 2605 2606 /* ask for eigenvalues larger than thresh */ 2607 #if defined(PETSC_USE_COMPLEX) 2608 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2609 #else 2610 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2611 #endif 2612 } else if (!same_data) { 2613 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2614 B_IL = 1; 2615 #if defined(PETSC_USE_COMPLEX) 2616 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2617 #else 2618 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2619 #endif 2620 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2621 PetscInt k; 2622 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2623 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2624 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2625 nmin = nmax; 2626 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2627 for (k=0;k<nmax;k++) { 2628 eigs[k] = 1./PETSC_SMALL; 2629 eigv[k*(subset_size+1)] = 1.0; 2630 } 2631 } 2632 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2633 if (B_ierr) { 2634 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2635 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2636 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2637 } 2638 2639 if (B_neigs > nmax) { 2640 if (pcbddc->dbg_flag) { 2641 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2642 } 2643 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2644 B_neigs = nmax; 2645 } 2646 2647 nmin_s = PetscMin(nmin,B_N); 2648 if (B_neigs < nmin_s) { 2649 PetscBLASInt B_neigs2; 2650 2651 if (pcbddc->use_deluxe_scaling) { 2652 B_IL = B_N - nmin_s + 1; 2653 B_IU = B_N - B_neigs; 2654 } else { 2655 B_IL = B_neigs + 1; 2656 B_IU = nmin_s; 2657 } 2658 if (pcbddc->dbg_flag) { 2659 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2660 } 2661 if (sub_schurs->is_hermitian) { 2662 PetscInt j,k; 2663 for (j=0;j<subset_size;j++) { 2664 for (k=j;k<subset_size;k++) { 2665 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2666 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2667 } 2668 } 2669 } else { 2670 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2671 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2672 } 2673 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2674 #if defined(PETSC_USE_COMPLEX) 2675 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2676 #else 2677 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2678 #endif 2679 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2680 B_neigs += B_neigs2; 2681 } 2682 if (B_ierr) { 2683 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2684 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2685 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2686 } 2687 if (pcbddc->dbg_flag) { 2688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2689 for (j=0;j<B_neigs;j++) { 2690 if (eigs[j] == 0.0) { 2691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2692 } else { 2693 if (pcbddc->use_deluxe_scaling) { 2694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2695 } else { 2696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2697 } 2698 } 2699 } 2700 } 2701 } else { 2702 /* TODO */ 2703 } 2704 } 2705 /* change the basis back to the original one */ 2706 if (sub_schurs->change) { 2707 Mat change,phi,phit; 2708 2709 if (pcbddc->dbg_flag > 1) { 2710 PetscInt ii; 2711 for (ii=0;ii<B_neigs;ii++) { 2712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2713 for (j=0;j<B_N;j++) { 2714 #if defined(PETSC_USE_COMPLEX) 2715 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2716 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2717 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2718 #else 2719 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2720 #endif 2721 } 2722 } 2723 } 2724 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2725 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2726 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2727 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2728 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2729 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2730 } 2731 maxneigs = PetscMax(B_neigs,maxneigs); 2732 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2733 if (B_neigs) { 2734 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2735 2736 if (pcbddc->dbg_flag > 1) { 2737 PetscInt ii; 2738 for (ii=0;ii<B_neigs;ii++) { 2739 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2740 for (j=0;j<B_N;j++) { 2741 #if defined(PETSC_USE_COMPLEX) 2742 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2743 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2744 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2745 #else 2746 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2747 #endif 2748 } 2749 } 2750 } 2751 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2752 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2753 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2754 cum++; 2755 } 2756 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2757 /* shift for next computation */ 2758 cumarray += subset_size*subset_size; 2759 } 2760 if (pcbddc->dbg_flag) { 2761 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2762 } 2763 2764 if (mss) { 2765 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2766 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2767 /* destroy matrices (junk) */ 2768 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2769 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2770 } 2771 if (allocated_S_St) { 2772 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2773 } 2774 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2775 #if defined(PETSC_USE_COMPLEX) 2776 ierr = PetscFree(rwork);CHKERRQ(ierr); 2777 #endif 2778 if (pcbddc->dbg_flag) { 2779 PetscInt maxneigs_r; 2780 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2781 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2782 } 2783 PetscFunctionReturn(0); 2784 } 2785 2786 #undef __FUNCT__ 2787 #define __FUNCT__ "PCBDDCSetUpSolvers" 2788 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2789 { 2790 PetscScalar *coarse_submat_vals; 2791 PetscErrorCode ierr; 2792 2793 PetscFunctionBegin; 2794 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2795 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2796 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2797 2798 /* Setup local neumann solver ksp_R */ 2799 /* PCBDDCSetUpLocalScatters should be called first! */ 2800 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2801 2802 /* 2803 Setup local correction and local part of coarse basis. 2804 Gives back the dense local part of the coarse matrix in column major ordering 2805 */ 2806 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2807 2808 /* Compute total number of coarse nodes and setup coarse solver */ 2809 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2810 2811 /* free */ 2812 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2813 PetscFunctionReturn(0); 2814 } 2815 2816 #undef __FUNCT__ 2817 #define __FUNCT__ "PCBDDCResetCustomization" 2818 PetscErrorCode PCBDDCResetCustomization(PC pc) 2819 { 2820 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2821 PetscErrorCode ierr; 2822 2823 PetscFunctionBegin; 2824 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2825 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2826 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2827 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2828 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2829 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2830 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2831 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2832 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2833 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2834 PetscFunctionReturn(0); 2835 } 2836 2837 #undef __FUNCT__ 2838 #define __FUNCT__ "PCBDDCResetTopography" 2839 PetscErrorCode PCBDDCResetTopography(PC pc) 2840 { 2841 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2842 PetscInt i; 2843 PetscErrorCode ierr; 2844 2845 PetscFunctionBegin; 2846 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2847 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2848 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2849 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2850 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2851 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2852 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2853 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2854 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2855 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2856 for (i=0;i<pcbddc->n_local_subs;i++) { 2857 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2858 } 2859 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2860 if (pcbddc->sub_schurs) { 2861 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2862 } 2863 pcbddc->graphanalyzed = PETSC_FALSE; 2864 pcbddc->recompute_topography = PETSC_TRUE; 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "PCBDDCResetSolvers" 2870 PetscErrorCode PCBDDCResetSolvers(PC pc) 2871 { 2872 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2873 PetscErrorCode ierr; 2874 2875 PetscFunctionBegin; 2876 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2877 if (pcbddc->coarse_phi_B) { 2878 PetscScalar *array; 2879 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2880 ierr = PetscFree(array);CHKERRQ(ierr); 2881 } 2882 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2883 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2884 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2885 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2886 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2887 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2888 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2889 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2890 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2891 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2892 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2893 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2894 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2895 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2896 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2897 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2898 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2899 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2900 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2901 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2902 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2903 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2904 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2905 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2906 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2907 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2908 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2909 if (pcbddc->benign_zerodiag_subs) { 2910 PetscInt i; 2911 for (i=0;i<pcbddc->benign_n;i++) { 2912 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2913 } 2914 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2915 } 2916 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2917 PetscFunctionReturn(0); 2918 } 2919 2920 #undef __FUNCT__ 2921 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2922 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2923 { 2924 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2925 PC_IS *pcis = (PC_IS*)pc->data; 2926 VecType impVecType; 2927 PetscInt n_constraints,n_R,old_size; 2928 PetscErrorCode ierr; 2929 2930 PetscFunctionBegin; 2931 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2932 /* get sizes */ 2933 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2934 n_R = pcis->n - pcbddc->n_vertices; 2935 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2936 /* local work vectors (try to avoid unneeded work)*/ 2937 /* R nodes */ 2938 old_size = -1; 2939 if (pcbddc->vec1_R) { 2940 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2941 } 2942 if (n_R != old_size) { 2943 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2944 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2945 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2946 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2947 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2948 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2949 } 2950 /* local primal dofs */ 2951 old_size = -1; 2952 if (pcbddc->vec1_P) { 2953 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2954 } 2955 if (pcbddc->local_primal_size != old_size) { 2956 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2957 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2958 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2959 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2960 } 2961 /* local explicit constraints */ 2962 old_size = -1; 2963 if (pcbddc->vec1_C) { 2964 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2965 } 2966 if (n_constraints && n_constraints != old_size) { 2967 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2968 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2969 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2970 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2971 } 2972 PetscFunctionReturn(0); 2973 } 2974 2975 #undef __FUNCT__ 2976 #define __FUNCT__ "PCBDDCSetUpCorrection" 2977 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2978 { 2979 PetscErrorCode ierr; 2980 /* pointers to pcis and pcbddc */ 2981 PC_IS* pcis = (PC_IS*)pc->data; 2982 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2983 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2984 /* submatrices of local problem */ 2985 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2986 /* submatrices of local coarse problem */ 2987 Mat S_VV,S_CV,S_VC,S_CC; 2988 /* working matrices */ 2989 Mat C_CR; 2990 /* additional working stuff */ 2991 PC pc_R; 2992 Mat F; 2993 Vec dummy_vec; 2994 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2995 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2996 PetscScalar *work; 2997 PetscInt *idx_V_B; 2998 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2999 PetscInt i,n_R,n_D,n_B; 3000 3001 /* some shortcuts to scalars */ 3002 PetscScalar one=1.0,m_one=-1.0; 3003 3004 PetscFunctionBegin; 3005 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3006 3007 /* Set Non-overlapping dimensions */ 3008 n_vertices = pcbddc->n_vertices; 3009 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3010 n_B = pcis->n_B; 3011 n_D = pcis->n - n_B; 3012 n_R = pcis->n - n_vertices; 3013 3014 /* vertices in boundary numbering */ 3015 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3016 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3017 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3018 3019 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3020 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3021 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3022 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3023 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3024 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3025 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3026 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3027 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3028 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3029 3030 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3031 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3032 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3033 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3034 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3035 lda_rhs = n_R; 3036 need_benign_correction = PETSC_FALSE; 3037 if (isLU || isILU || isCHOL) { 3038 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3039 } else if (sub_schurs && sub_schurs->reuse_solver) { 3040 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3041 MatFactorType type; 3042 3043 F = reuse_solver->F; 3044 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3045 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3046 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3047 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3048 } else { 3049 F = NULL; 3050 } 3051 3052 /* allocate workspace */ 3053 n = 0; 3054 if (n_constraints) { 3055 n += lda_rhs*n_constraints; 3056 } 3057 if (n_vertices) { 3058 n = PetscMax(2*lda_rhs*n_vertices,n); 3059 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3060 } 3061 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3062 3063 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3064 dummy_vec = NULL; 3065 if (need_benign_correction && lda_rhs != n_R && F) { 3066 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3067 } 3068 3069 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3070 if (n_constraints) { 3071 Mat M1,M2,M3,C_B; 3072 IS is_aux; 3073 PetscScalar *array,*array2; 3074 3075 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3076 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3077 3078 /* Extract constraints on R nodes: C_{CR} */ 3079 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3080 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3081 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3082 3083 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3084 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3085 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3086 for (i=0;i<n_constraints;i++) { 3087 const PetscScalar *row_cmat_values; 3088 const PetscInt *row_cmat_indices; 3089 PetscInt size_of_constraint,j; 3090 3091 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3092 for (j=0;j<size_of_constraint;j++) { 3093 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3094 } 3095 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3096 } 3097 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3098 if (F) { 3099 Mat B; 3100 3101 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3102 if (need_benign_correction) { 3103 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3104 3105 /* rhs is already zero on interior dofs, no need to change the rhs */ 3106 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3107 } 3108 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3109 if (need_benign_correction) { 3110 PetscScalar *marr; 3111 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3112 3113 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3114 if (lda_rhs != n_R) { 3115 for (i=0;i<n_constraints;i++) { 3116 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3117 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3118 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3119 } 3120 } else { 3121 for (i=0;i<n_constraints;i++) { 3122 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3123 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3124 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3125 } 3126 } 3127 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3128 } 3129 ierr = MatDestroy(&B);CHKERRQ(ierr); 3130 } else { 3131 PetscScalar *marr; 3132 3133 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3134 for (i=0;i<n_constraints;i++) { 3135 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3136 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3137 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3138 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3139 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3140 } 3141 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3142 } 3143 if (!pcbddc->switch_static) { 3144 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3145 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3146 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3147 for (i=0;i<n_constraints;i++) { 3148 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3149 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3150 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3151 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3152 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3153 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3154 } 3155 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3156 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3157 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3158 } else { 3159 if (lda_rhs != n_R) { 3160 IS dummy; 3161 3162 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3163 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3164 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3165 } else { 3166 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3167 pcbddc->local_auxmat2 = local_auxmat2_R; 3168 } 3169 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3170 } 3171 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3172 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3173 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3174 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3175 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3176 if (isCHOL) { 3177 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3178 } else { 3179 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3180 } 3181 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3182 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3183 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3184 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3185 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3186 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3187 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3188 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3189 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3190 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3191 } 3192 3193 /* Get submatrices from subdomain matrix */ 3194 if (n_vertices) { 3195 IS is_aux; 3196 3197 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3198 IS tis; 3199 3200 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3201 ierr = ISSort(tis);CHKERRQ(ierr); 3202 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3203 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3204 } else { 3205 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3206 } 3207 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3208 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3209 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3210 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3211 } 3212 3213 /* Matrix of coarse basis functions (local) */ 3214 if (pcbddc->coarse_phi_B) { 3215 PetscInt on_B,on_primal,on_D=n_D; 3216 if (pcbddc->coarse_phi_D) { 3217 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3218 } 3219 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3220 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3221 PetscScalar *marray; 3222 3223 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3224 ierr = PetscFree(marray);CHKERRQ(ierr); 3225 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3226 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3227 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3228 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3229 } 3230 } 3231 3232 if (!pcbddc->coarse_phi_B) { 3233 PetscScalar *marray; 3234 3235 n = n_B*pcbddc->local_primal_size; 3236 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3237 n += n_D*pcbddc->local_primal_size; 3238 } 3239 if (!pcbddc->symmetric_primal) { 3240 n *= 2; 3241 } 3242 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3243 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3244 n = n_B*pcbddc->local_primal_size; 3245 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3246 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3247 n += n_D*pcbddc->local_primal_size; 3248 } 3249 if (!pcbddc->symmetric_primal) { 3250 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3251 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3252 n = n_B*pcbddc->local_primal_size; 3253 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3254 } 3255 } else { 3256 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3257 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3258 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3259 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3260 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3261 } 3262 } 3263 } 3264 3265 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3266 p0_lidx_I = NULL; 3267 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3268 const PetscInt *idxs; 3269 3270 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3271 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3272 for (i=0;i<pcbddc->benign_n;i++) { 3273 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3274 } 3275 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3276 } 3277 3278 /* vertices */ 3279 if (n_vertices) { 3280 3281 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3282 3283 if (n_R) { 3284 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3285 PetscBLASInt B_N,B_one = 1; 3286 PetscScalar *x,*y; 3287 PetscBool isseqaij; 3288 3289 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3290 if (need_benign_correction) { 3291 ISLocalToGlobalMapping RtoN; 3292 IS is_p0; 3293 PetscInt *idxs_p0,n; 3294 3295 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3296 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3297 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3298 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3299 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3300 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3301 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3302 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3303 } 3304 3305 if (lda_rhs == n_R) { 3306 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3307 } else { 3308 PetscScalar *av,*array; 3309 const PetscInt *xadj,*adjncy; 3310 PetscInt n; 3311 PetscBool flg_row; 3312 3313 array = work+lda_rhs*n_vertices; 3314 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3315 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3316 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3317 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3318 for (i=0;i<n;i++) { 3319 PetscInt j; 3320 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3321 } 3322 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3323 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3324 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3325 } 3326 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3327 if (need_benign_correction) { 3328 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3329 PetscScalar *marr; 3330 3331 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3332 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3333 3334 | 0 0 0 | (V) 3335 L = | 0 0 -1 | (P-p0) 3336 | 0 0 -1 | (p0) 3337 3338 */ 3339 for (i=0;i<reuse_solver->benign_n;i++) { 3340 const PetscScalar *vals; 3341 const PetscInt *idxs,*idxs_zero; 3342 PetscInt n,j,nz; 3343 3344 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3345 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3346 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3347 for (j=0;j<n;j++) { 3348 PetscScalar val = vals[j]; 3349 PetscInt k,col = idxs[j]; 3350 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3351 } 3352 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3353 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3354 } 3355 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3356 } 3357 if (F) { 3358 /* need to correct the rhs */ 3359 if (need_benign_correction) { 3360 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3361 PetscScalar *marr; 3362 3363 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3364 if (lda_rhs != n_R) { 3365 for (i=0;i<n_vertices;i++) { 3366 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3367 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3368 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3369 } 3370 } else { 3371 for (i=0;i<n_vertices;i++) { 3372 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3373 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3374 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3375 } 3376 } 3377 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3378 } 3379 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3380 /* need to correct the solution */ 3381 if (need_benign_correction) { 3382 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3383 PetscScalar *marr; 3384 3385 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3386 if (lda_rhs != n_R) { 3387 for (i=0;i<n_vertices;i++) { 3388 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3389 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3390 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3391 } 3392 } else { 3393 for (i=0;i<n_vertices;i++) { 3394 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3395 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3396 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3397 } 3398 } 3399 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3400 } 3401 } else { 3402 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3403 for (i=0;i<n_vertices;i++) { 3404 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3405 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3406 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3407 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3408 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3409 } 3410 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3411 } 3412 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3413 /* S_VV and S_CV */ 3414 if (n_constraints) { 3415 Mat B; 3416 3417 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3418 for (i=0;i<n_vertices;i++) { 3419 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3420 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3421 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3422 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3423 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3424 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3425 } 3426 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3427 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3428 ierr = MatDestroy(&B);CHKERRQ(ierr); 3429 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3430 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3431 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3432 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3433 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3434 ierr = MatDestroy(&B);CHKERRQ(ierr); 3435 } 3436 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3437 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3438 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3439 } 3440 if (lda_rhs != n_R) { 3441 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3442 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3443 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3444 } 3445 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3446 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3447 if (need_benign_correction) { 3448 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3449 PetscScalar *marr,*sums; 3450 3451 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3452 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3453 for (i=0;i<reuse_solver->benign_n;i++) { 3454 const PetscScalar *vals; 3455 const PetscInt *idxs,*idxs_zero; 3456 PetscInt n,j,nz; 3457 3458 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3459 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3460 for (j=0;j<n_vertices;j++) { 3461 PetscInt k; 3462 sums[j] = 0.; 3463 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3464 } 3465 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3466 for (j=0;j<n;j++) { 3467 PetscScalar val = vals[j]; 3468 PetscInt k; 3469 for (k=0;k<n_vertices;k++) { 3470 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3471 } 3472 } 3473 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3474 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3475 } 3476 ierr = PetscFree(sums);CHKERRQ(ierr); 3477 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3478 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3479 } 3480 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3481 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3482 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3483 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3484 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3485 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3486 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3487 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3488 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3489 } else { 3490 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3491 } 3492 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3493 3494 /* coarse basis functions */ 3495 for (i=0;i<n_vertices;i++) { 3496 PetscScalar *y; 3497 3498 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3499 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3500 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3501 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3502 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3503 y[n_B*i+idx_V_B[i]] = 1.0; 3504 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3505 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3506 3507 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3508 PetscInt j; 3509 3510 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3511 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3512 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3513 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3514 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3515 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3516 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3517 } 3518 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3519 } 3520 /* if n_R == 0 the object is not destroyed */ 3521 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3522 } 3523 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3524 3525 if (n_constraints) { 3526 Mat B; 3527 3528 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3529 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3530 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3531 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3532 if (n_vertices) { 3533 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3534 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3535 } else { 3536 Mat S_VCt; 3537 3538 if (lda_rhs != n_R) { 3539 ierr = MatDestroy(&B);CHKERRQ(ierr); 3540 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3541 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3542 } 3543 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3544 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3545 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3546 } 3547 } 3548 ierr = MatDestroy(&B);CHKERRQ(ierr); 3549 /* coarse basis functions */ 3550 for (i=0;i<n_constraints;i++) { 3551 PetscScalar *y; 3552 3553 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3554 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3555 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3556 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3557 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3558 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3559 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3560 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3561 PetscInt j; 3562 3563 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3564 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3565 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3566 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3567 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3568 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3569 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3570 } 3571 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3572 } 3573 } 3574 if (n_constraints) { 3575 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3576 } 3577 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3578 3579 /* coarse matrix entries relative to B_0 */ 3580 if (pcbddc->benign_n) { 3581 Mat B0_B,B0_BPHI; 3582 IS is_dummy; 3583 PetscScalar *data; 3584 PetscInt j; 3585 3586 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3587 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3588 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3589 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3590 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3591 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3592 for (j=0;j<pcbddc->benign_n;j++) { 3593 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3594 for (i=0;i<pcbddc->local_primal_size;i++) { 3595 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3596 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3597 } 3598 } 3599 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3600 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3601 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3602 } 3603 3604 /* compute other basis functions for non-symmetric problems */ 3605 if (!pcbddc->symmetric_primal) { 3606 Mat B_V=NULL,B_C=NULL; 3607 PetscScalar *marray; 3608 3609 if (n_constraints) { 3610 Mat S_CCT,C_CRT; 3611 3612 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3613 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3614 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3615 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3616 if (n_vertices) { 3617 Mat S_VCT; 3618 3619 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3620 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3621 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3622 } 3623 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3624 } else { 3625 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3626 } 3627 if (n_vertices && n_R) { 3628 PetscScalar *av,*marray; 3629 const PetscInt *xadj,*adjncy; 3630 PetscInt n; 3631 PetscBool flg_row; 3632 3633 /* B_V = B_V - A_VR^T */ 3634 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3635 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3636 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3637 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3638 for (i=0;i<n;i++) { 3639 PetscInt j; 3640 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3641 } 3642 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3643 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3644 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3645 } 3646 3647 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3648 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3649 for (i=0;i<n_vertices;i++) { 3650 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3651 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3652 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3653 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3654 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3655 } 3656 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3657 if (B_C) { 3658 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3659 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3660 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3661 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3662 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3663 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3664 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3665 } 3666 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3667 } 3668 /* coarse basis functions */ 3669 for (i=0;i<pcbddc->local_primal_size;i++) { 3670 PetscScalar *y; 3671 3672 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3673 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3674 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3675 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3676 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3677 if (i<n_vertices) { 3678 y[n_B*i+idx_V_B[i]] = 1.0; 3679 } 3680 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3681 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3682 3683 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3684 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3685 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3686 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3687 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3688 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3689 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3690 } 3691 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3692 } 3693 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3694 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3695 } 3696 /* free memory */ 3697 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3698 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3699 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3700 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3701 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3702 ierr = PetscFree(work);CHKERRQ(ierr); 3703 if (n_vertices) { 3704 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3705 } 3706 if (n_constraints) { 3707 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3708 } 3709 /* Checking coarse_sub_mat and coarse basis functios */ 3710 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3711 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3712 if (pcbddc->dbg_flag) { 3713 Mat coarse_sub_mat; 3714 Mat AUXMAT,TM1,TM2,TM3,TM4; 3715 Mat coarse_phi_D,coarse_phi_B; 3716 Mat coarse_psi_D,coarse_psi_B; 3717 Mat A_II,A_BB,A_IB,A_BI; 3718 Mat C_B,CPHI; 3719 IS is_dummy; 3720 Vec mones; 3721 MatType checkmattype=MATSEQAIJ; 3722 PetscReal real_value; 3723 3724 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3725 Mat A; 3726 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3727 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3728 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3729 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3730 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3731 ierr = MatDestroy(&A);CHKERRQ(ierr); 3732 } else { 3733 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3734 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3735 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3736 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3737 } 3738 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3739 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3740 if (!pcbddc->symmetric_primal) { 3741 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3742 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3743 } 3744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3745 3746 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3747 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3748 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3749 if (!pcbddc->symmetric_primal) { 3750 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3751 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3752 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3753 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3754 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3755 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3756 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3757 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3758 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3759 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3760 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3761 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3762 } else { 3763 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3764 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3765 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3766 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3767 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3768 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3769 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3770 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3771 } 3772 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3773 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3774 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3775 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3776 if (pcbddc->benign_n) { 3777 Mat B0_B,B0_BPHI; 3778 PetscScalar *data,*data2; 3779 PetscInt j; 3780 3781 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3782 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3783 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3784 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3785 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3786 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3787 for (j=0;j<pcbddc->benign_n;j++) { 3788 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3789 for (i=0;i<pcbddc->local_primal_size;i++) { 3790 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3791 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3792 } 3793 } 3794 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3795 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3796 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3797 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3798 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3799 } 3800 #if 0 3801 { 3802 PetscViewer viewer; 3803 char filename[256]; 3804 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3805 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3806 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3807 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3808 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3809 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3810 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3811 if (save_change) { 3812 Mat phi_B; 3813 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3814 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3815 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3816 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3817 } else { 3818 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3819 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3820 } 3821 if (pcbddc->coarse_phi_D) { 3822 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3823 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3824 } 3825 if (pcbddc->coarse_psi_B) { 3826 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3827 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3828 } 3829 if (pcbddc->coarse_psi_D) { 3830 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3831 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3832 } 3833 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3834 } 3835 #endif 3836 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3837 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3838 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3839 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3840 3841 /* check constraints */ 3842 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3843 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3844 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3845 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3846 } else { 3847 PetscScalar *data; 3848 Mat tmat; 3849 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3850 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3851 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3852 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3853 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3854 } 3855 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3856 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3857 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3858 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3859 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3860 if (!pcbddc->symmetric_primal) { 3861 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3862 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3863 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3864 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3865 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3866 } 3867 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3868 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3869 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3870 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3871 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3872 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3873 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3874 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3875 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3876 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3877 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3878 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3879 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3880 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3881 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3882 if (!pcbddc->symmetric_primal) { 3883 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3884 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3885 } 3886 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3887 } 3888 /* get back data */ 3889 *coarse_submat_vals_n = coarse_submat_vals; 3890 PetscFunctionReturn(0); 3891 } 3892 3893 #undef __FUNCT__ 3894 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3895 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3896 { 3897 Mat *work_mat; 3898 IS isrow_s,iscol_s; 3899 PetscBool rsorted,csorted; 3900 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3901 PetscErrorCode ierr; 3902 3903 PetscFunctionBegin; 3904 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3905 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3906 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3907 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3908 3909 if (!rsorted) { 3910 const PetscInt *idxs; 3911 PetscInt *idxs_sorted,i; 3912 3913 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3914 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3915 for (i=0;i<rsize;i++) { 3916 idxs_perm_r[i] = i; 3917 } 3918 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3919 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3920 for (i=0;i<rsize;i++) { 3921 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3922 } 3923 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3924 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3925 } else { 3926 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3927 isrow_s = isrow; 3928 } 3929 3930 if (!csorted) { 3931 if (isrow == iscol) { 3932 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3933 iscol_s = isrow_s; 3934 } else { 3935 const PetscInt *idxs; 3936 PetscInt *idxs_sorted,i; 3937 3938 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3939 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3940 for (i=0;i<csize;i++) { 3941 idxs_perm_c[i] = i; 3942 } 3943 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3944 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3945 for (i=0;i<csize;i++) { 3946 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3947 } 3948 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3949 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3950 } 3951 } else { 3952 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3953 iscol_s = iscol; 3954 } 3955 3956 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3957 3958 if (!rsorted || !csorted) { 3959 Mat new_mat; 3960 IS is_perm_r,is_perm_c; 3961 3962 if (!rsorted) { 3963 PetscInt *idxs_r,i; 3964 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3965 for (i=0;i<rsize;i++) { 3966 idxs_r[idxs_perm_r[i]] = i; 3967 } 3968 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3969 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3970 } else { 3971 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3972 } 3973 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3974 3975 if (!csorted) { 3976 if (isrow_s == iscol_s) { 3977 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3978 is_perm_c = is_perm_r; 3979 } else { 3980 PetscInt *idxs_c,i; 3981 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 3982 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3983 for (i=0;i<csize;i++) { 3984 idxs_c[idxs_perm_c[i]] = i; 3985 } 3986 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3987 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3988 } 3989 } else { 3990 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3991 } 3992 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3993 3994 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3995 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3996 work_mat[0] = new_mat; 3997 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3998 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3999 } 4000 4001 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4002 *B = work_mat[0]; 4003 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4004 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4005 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4006 PetscFunctionReturn(0); 4007 } 4008 4009 #undef __FUNCT__ 4010 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 4011 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4012 { 4013 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4014 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4015 Mat new_mat; 4016 IS is_local,is_global; 4017 PetscInt local_size; 4018 PetscBool isseqaij; 4019 PetscErrorCode ierr; 4020 4021 PetscFunctionBegin; 4022 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4023 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4024 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4025 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4026 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4027 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4028 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4029 4030 /* check */ 4031 if (pcbddc->dbg_flag) { 4032 Vec x,x_change; 4033 PetscReal error; 4034 4035 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4036 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4037 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4038 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4039 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4040 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4041 if (!pcbddc->change_interior) { 4042 const PetscScalar *x,*y,*v; 4043 PetscReal lerror = 0.; 4044 PetscInt i; 4045 4046 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4047 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4048 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4049 for (i=0;i<local_size;i++) 4050 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4051 lerror = PetscAbsScalar(x[i]-y[i]); 4052 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4053 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4054 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4055 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4056 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on I: %1.6e\n",error);CHKERRQ(ierr); 4057 } 4058 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4059 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4060 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4061 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4062 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4063 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 4064 ierr = VecDestroy(&x);CHKERRQ(ierr); 4065 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4066 } 4067 4068 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4069 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4070 if (isseqaij) { 4071 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4072 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4073 } else { 4074 Mat work_mat; 4075 4076 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4077 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4078 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4079 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4080 } 4081 if (matis->A->symmetric_set) { 4082 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4083 #if !defined(PETSC_USE_COMPLEX) 4084 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4085 #endif 4086 } 4087 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4088 PetscFunctionReturn(0); 4089 } 4090 4091 #undef __FUNCT__ 4092 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 4093 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4094 { 4095 PC_IS* pcis = (PC_IS*)(pc->data); 4096 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4097 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4098 PetscInt *idx_R_local=NULL; 4099 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4100 PetscInt vbs,bs; 4101 PetscBT bitmask=NULL; 4102 PetscErrorCode ierr; 4103 4104 PetscFunctionBegin; 4105 /* 4106 No need to setup local scatters if 4107 - primal space is unchanged 4108 AND 4109 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4110 AND 4111 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4112 */ 4113 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4114 PetscFunctionReturn(0); 4115 } 4116 /* destroy old objects */ 4117 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4118 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4119 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4120 /* Set Non-overlapping dimensions */ 4121 n_B = pcis->n_B; 4122 n_D = pcis->n - n_B; 4123 n_vertices = pcbddc->n_vertices; 4124 4125 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4126 4127 /* create auxiliary bitmask and allocate workspace */ 4128 if (!sub_schurs || !sub_schurs->reuse_solver) { 4129 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4130 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4131 for (i=0;i<n_vertices;i++) { 4132 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4133 } 4134 4135 for (i=0, n_R=0; i<pcis->n; i++) { 4136 if (!PetscBTLookup(bitmask,i)) { 4137 idx_R_local[n_R++] = i; 4138 } 4139 } 4140 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4141 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4142 4143 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4144 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4145 } 4146 4147 /* Block code */ 4148 vbs = 1; 4149 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4150 if (bs>1 && !(n_vertices%bs)) { 4151 PetscBool is_blocked = PETSC_TRUE; 4152 PetscInt *vary; 4153 if (!sub_schurs || !sub_schurs->reuse_solver) { 4154 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4155 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4156 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4157 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4158 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4159 for (i=0; i<pcis->n/bs; i++) { 4160 if (vary[i]!=0 && vary[i]!=bs) { 4161 is_blocked = PETSC_FALSE; 4162 break; 4163 } 4164 } 4165 ierr = PetscFree(vary);CHKERRQ(ierr); 4166 } else { 4167 /* Verify directly the R set */ 4168 for (i=0; i<n_R/bs; i++) { 4169 PetscInt j,node=idx_R_local[bs*i]; 4170 for (j=1; j<bs; j++) { 4171 if (node != idx_R_local[bs*i+j]-j) { 4172 is_blocked = PETSC_FALSE; 4173 break; 4174 } 4175 } 4176 } 4177 } 4178 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4179 vbs = bs; 4180 for (i=0;i<n_R/vbs;i++) { 4181 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4182 } 4183 } 4184 } 4185 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4186 if (sub_schurs && sub_schurs->reuse_solver) { 4187 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4188 4189 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4190 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4191 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4192 reuse_solver->is_R = pcbddc->is_R_local; 4193 } else { 4194 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4195 } 4196 4197 /* print some info if requested */ 4198 if (pcbddc->dbg_flag) { 4199 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4200 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4201 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4202 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4203 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4204 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4205 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4206 } 4207 4208 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4209 if (!sub_schurs || !sub_schurs->reuse_solver) { 4210 IS is_aux1,is_aux2; 4211 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4212 4213 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4214 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4215 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4216 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4217 for (i=0; i<n_D; i++) { 4218 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4219 } 4220 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4221 for (i=0, j=0; i<n_R; i++) { 4222 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4223 aux_array1[j++] = i; 4224 } 4225 } 4226 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4227 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4228 for (i=0, j=0; i<n_B; i++) { 4229 if (!PetscBTLookup(bitmask,is_indices[i])) { 4230 aux_array2[j++] = i; 4231 } 4232 } 4233 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4234 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4235 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4236 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4237 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4238 4239 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4240 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4241 for (i=0, j=0; i<n_R; i++) { 4242 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4243 aux_array1[j++] = i; 4244 } 4245 } 4246 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4247 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4248 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4249 } 4250 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4251 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4252 } else { 4253 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4254 IS tis; 4255 PetscInt schur_size; 4256 4257 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4258 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4259 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4260 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4261 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4262 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4263 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4264 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4265 } 4266 } 4267 PetscFunctionReturn(0); 4268 } 4269 4270 4271 #undef __FUNCT__ 4272 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4273 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4274 { 4275 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4276 PC_IS *pcis = (PC_IS*)pc->data; 4277 PC pc_temp; 4278 Mat A_RR; 4279 MatReuse reuse; 4280 PetscScalar m_one = -1.0; 4281 PetscReal value; 4282 PetscInt n_D,n_R; 4283 PetscBool check_corr[2],issbaij; 4284 PetscErrorCode ierr; 4285 /* prefixes stuff */ 4286 char dir_prefix[256],neu_prefix[256],str_level[16]; 4287 size_t len; 4288 4289 PetscFunctionBegin; 4290 4291 /* compute prefixes */ 4292 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4293 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4294 if (!pcbddc->current_level) { 4295 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4296 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4297 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4298 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4299 } else { 4300 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4301 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4302 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4303 len -= 15; /* remove "pc_bddc_coarse_" */ 4304 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4305 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4306 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4307 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4308 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4309 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4310 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4311 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4312 } 4313 4314 /* DIRICHLET PROBLEM */ 4315 if (dirichlet) { 4316 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4317 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4318 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4319 if (pcbddc->dbg_flag) { 4320 Mat A_IIn; 4321 4322 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4323 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4324 pcis->A_II = A_IIn; 4325 } 4326 } 4327 if (pcbddc->local_mat->symmetric_set) { 4328 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4329 } 4330 /* Matrix for Dirichlet problem is pcis->A_II */ 4331 n_D = pcis->n - pcis->n_B; 4332 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4333 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4334 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4335 /* default */ 4336 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4337 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4338 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4339 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4340 if (issbaij) { 4341 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4342 } else { 4343 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4344 } 4345 /* Allow user's customization */ 4346 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4347 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4348 } 4349 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4350 if (sub_schurs && sub_schurs->reuse_solver) { 4351 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4352 4353 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4354 } 4355 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4356 if (!n_D) { 4357 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4358 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4359 } 4360 /* Set Up KSP for Dirichlet problem of BDDC */ 4361 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4362 /* set ksp_D into pcis data */ 4363 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4364 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4365 pcis->ksp_D = pcbddc->ksp_D; 4366 } 4367 4368 /* NEUMANN PROBLEM */ 4369 A_RR = 0; 4370 if (neumann) { 4371 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4372 PetscInt ibs,mbs; 4373 PetscBool issbaij; 4374 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4375 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4376 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4377 if (pcbddc->ksp_R) { /* already created ksp */ 4378 PetscInt nn_R; 4379 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4380 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4381 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4382 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4383 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4384 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4385 reuse = MAT_INITIAL_MATRIX; 4386 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4387 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4388 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4389 reuse = MAT_INITIAL_MATRIX; 4390 } else { /* safe to reuse the matrix */ 4391 reuse = MAT_REUSE_MATRIX; 4392 } 4393 } 4394 /* last check */ 4395 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4396 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4397 reuse = MAT_INITIAL_MATRIX; 4398 } 4399 } else { /* first time, so we need to create the matrix */ 4400 reuse = MAT_INITIAL_MATRIX; 4401 } 4402 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4403 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4404 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4405 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4406 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4407 if (matis->A == pcbddc->local_mat) { 4408 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4409 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4410 } else { 4411 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4412 } 4413 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4414 if (matis->A == pcbddc->local_mat) { 4415 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4416 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4417 } else { 4418 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4419 } 4420 } 4421 /* extract A_RR */ 4422 if (sub_schurs && sub_schurs->reuse_solver) { 4423 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4424 4425 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4426 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4427 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4428 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4429 } else { 4430 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4431 } 4432 } else { 4433 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4434 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4435 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4436 } 4437 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4438 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4439 } 4440 if (pcbddc->local_mat->symmetric_set) { 4441 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4442 } 4443 if (!pcbddc->ksp_R) { /* create object if not present */ 4444 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4445 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4446 /* default */ 4447 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4448 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4449 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4450 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4451 if (issbaij) { 4452 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4453 } else { 4454 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4455 } 4456 /* Allow user's customization */ 4457 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4458 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4459 } 4460 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4461 if (!n_R) { 4462 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4463 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4464 } 4465 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4466 /* Reuse solver if it is present */ 4467 if (sub_schurs && sub_schurs->reuse_solver) { 4468 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4469 4470 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4471 } 4472 /* Set Up KSP for Neumann problem of BDDC */ 4473 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4474 } 4475 4476 if (pcbddc->dbg_flag) { 4477 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4478 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4479 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4480 } 4481 4482 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4483 check_corr[0] = check_corr[1] = PETSC_FALSE; 4484 if (pcbddc->NullSpace_corr[0]) { 4485 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4486 } 4487 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4488 check_corr[0] = PETSC_TRUE; 4489 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4490 } 4491 if (neumann && pcbddc->NullSpace_corr[2]) { 4492 check_corr[1] = PETSC_TRUE; 4493 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4494 } 4495 4496 /* check Dirichlet and Neumann solvers */ 4497 if (pcbddc->dbg_flag) { 4498 if (dirichlet) { /* Dirichlet */ 4499 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4500 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4501 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4502 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4503 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4504 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4505 if (check_corr[0]) { 4506 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4507 } 4508 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4509 } 4510 if (neumann) { /* Neumann */ 4511 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4512 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4513 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4514 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4515 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4516 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4517 if (check_corr[1]) { 4518 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4519 } 4520 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4521 } 4522 } 4523 /* free Neumann problem's matrix */ 4524 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4525 PetscFunctionReturn(0); 4526 } 4527 4528 #undef __FUNCT__ 4529 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4530 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4531 { 4532 PetscErrorCode ierr; 4533 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4534 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4535 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4536 4537 PetscFunctionBegin; 4538 if (!reuse_solver) { 4539 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4540 } 4541 if (!pcbddc->switch_static) { 4542 if (applytranspose && pcbddc->local_auxmat1) { 4543 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4544 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4545 } 4546 if (!reuse_solver) { 4547 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4548 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4549 } else { 4550 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4551 4552 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4553 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4554 } 4555 } else { 4556 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4557 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4558 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4559 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4560 if (applytranspose && pcbddc->local_auxmat1) { 4561 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4562 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4563 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4564 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4565 } 4566 } 4567 if (!reuse_solver || pcbddc->switch_static) { 4568 if (applytranspose) { 4569 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4570 } else { 4571 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4572 } 4573 } else { 4574 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4575 4576 if (applytranspose) { 4577 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4578 } else { 4579 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4580 } 4581 } 4582 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4583 if (!pcbddc->switch_static) { 4584 if (!reuse_solver) { 4585 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4586 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4587 } else { 4588 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4589 4590 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4591 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4592 } 4593 if (!applytranspose && pcbddc->local_auxmat1) { 4594 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4595 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4596 } 4597 } else { 4598 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4599 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4600 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4601 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4602 if (!applytranspose && pcbddc->local_auxmat1) { 4603 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4604 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4605 } 4606 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4607 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4608 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4609 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4610 } 4611 PetscFunctionReturn(0); 4612 } 4613 4614 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4615 #undef __FUNCT__ 4616 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4617 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4618 { 4619 PetscErrorCode ierr; 4620 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4621 PC_IS* pcis = (PC_IS*) (pc->data); 4622 const PetscScalar zero = 0.0; 4623 4624 PetscFunctionBegin; 4625 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4626 if (!pcbddc->benign_apply_coarse_only) { 4627 if (applytranspose) { 4628 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4629 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4630 } else { 4631 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4632 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4633 } 4634 } else { 4635 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4636 } 4637 4638 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4639 if (pcbddc->benign_n) { 4640 PetscScalar *array; 4641 PetscInt j; 4642 4643 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4644 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4645 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4646 } 4647 4648 /* start communications from local primal nodes to rhs of coarse solver */ 4649 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4650 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4651 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4652 4653 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4654 if (pcbddc->coarse_ksp) { 4655 Mat coarse_mat; 4656 Vec rhs,sol; 4657 MatNullSpace nullsp; 4658 PetscBool isbddc = PETSC_FALSE; 4659 4660 if (pcbddc->benign_have_null) { 4661 PC coarse_pc; 4662 4663 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4664 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4665 /* we need to propagate to coarser levels the need for a possible benign correction */ 4666 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4667 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4668 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4669 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4670 } 4671 } 4672 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4673 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4674 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4675 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4676 if (nullsp) { 4677 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4678 } 4679 if (applytranspose) { 4680 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4681 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4682 } else { 4683 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4684 PC coarse_pc; 4685 4686 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4687 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4688 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4689 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4690 } else { 4691 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4692 } 4693 } 4694 /* we don't need the benign correction at coarser levels anymore */ 4695 if (pcbddc->benign_have_null && isbddc) { 4696 PC coarse_pc; 4697 PC_BDDC* coarsepcbddc; 4698 4699 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4700 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4701 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4702 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4703 } 4704 if (nullsp) { 4705 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4706 } 4707 } 4708 4709 /* Local solution on R nodes */ 4710 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4711 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4712 } 4713 /* communications from coarse sol to local primal nodes */ 4714 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4715 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4716 4717 /* Sum contributions from the two levels */ 4718 if (!pcbddc->benign_apply_coarse_only) { 4719 if (applytranspose) { 4720 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4721 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4722 } else { 4723 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4724 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4725 } 4726 /* store p0 */ 4727 if (pcbddc->benign_n) { 4728 PetscScalar *array; 4729 PetscInt j; 4730 4731 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4732 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4733 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4734 } 4735 } else { /* expand the coarse solution */ 4736 if (applytranspose) { 4737 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4738 } else { 4739 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4740 } 4741 } 4742 PetscFunctionReturn(0); 4743 } 4744 4745 #undef __FUNCT__ 4746 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4747 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4748 { 4749 PetscErrorCode ierr; 4750 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4751 PetscScalar *array; 4752 Vec from,to; 4753 4754 PetscFunctionBegin; 4755 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4756 from = pcbddc->coarse_vec; 4757 to = pcbddc->vec1_P; 4758 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4759 Vec tvec; 4760 4761 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4762 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4763 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4764 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4765 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4766 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4767 } 4768 } else { /* from local to global -> put data in coarse right hand side */ 4769 from = pcbddc->vec1_P; 4770 to = pcbddc->coarse_vec; 4771 } 4772 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4773 PetscFunctionReturn(0); 4774 } 4775 4776 #undef __FUNCT__ 4777 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4778 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4779 { 4780 PetscErrorCode ierr; 4781 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4782 PetscScalar *array; 4783 Vec from,to; 4784 4785 PetscFunctionBegin; 4786 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4787 from = pcbddc->coarse_vec; 4788 to = pcbddc->vec1_P; 4789 } else { /* from local to global -> put data in coarse right hand side */ 4790 from = pcbddc->vec1_P; 4791 to = pcbddc->coarse_vec; 4792 } 4793 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4794 if (smode == SCATTER_FORWARD) { 4795 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4796 Vec tvec; 4797 4798 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4799 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4800 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4801 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4802 } 4803 } else { 4804 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4805 ierr = VecResetArray(from);CHKERRQ(ierr); 4806 } 4807 } 4808 PetscFunctionReturn(0); 4809 } 4810 4811 /* uncomment for testing purposes */ 4812 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4813 #undef __FUNCT__ 4814 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4815 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4816 { 4817 PetscErrorCode ierr; 4818 PC_IS* pcis = (PC_IS*)(pc->data); 4819 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4820 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4821 /* one and zero */ 4822 PetscScalar one=1.0,zero=0.0; 4823 /* space to store constraints and their local indices */ 4824 PetscScalar *constraints_data; 4825 PetscInt *constraints_idxs,*constraints_idxs_B; 4826 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4827 PetscInt *constraints_n; 4828 /* iterators */ 4829 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4830 /* BLAS integers */ 4831 PetscBLASInt lwork,lierr; 4832 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4833 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4834 /* reuse */ 4835 PetscInt olocal_primal_size,olocal_primal_size_cc; 4836 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4837 /* change of basis */ 4838 PetscBool qr_needed; 4839 PetscBT change_basis,qr_needed_idx; 4840 /* auxiliary stuff */ 4841 PetscInt *nnz,*is_indices; 4842 PetscInt ncc; 4843 /* some quantities */ 4844 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4845 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4846 4847 PetscFunctionBegin; 4848 /* Destroy Mat objects computed previously */ 4849 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4850 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4851 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4852 /* save info on constraints from previous setup (if any) */ 4853 olocal_primal_size = pcbddc->local_primal_size; 4854 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4855 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4856 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4857 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4858 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4859 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4860 4861 if (!pcbddc->adaptive_selection) { 4862 IS ISForVertices,*ISForFaces,*ISForEdges; 4863 MatNullSpace nearnullsp; 4864 const Vec *nearnullvecs; 4865 Vec *localnearnullsp; 4866 PetscScalar *array; 4867 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4868 PetscBool nnsp_has_cnst; 4869 /* LAPACK working arrays for SVD or POD */ 4870 PetscBool skip_lapack,boolforchange; 4871 PetscScalar *work; 4872 PetscReal *singular_vals; 4873 #if defined(PETSC_USE_COMPLEX) 4874 PetscReal *rwork; 4875 #endif 4876 #if defined(PETSC_MISSING_LAPACK_GESVD) 4877 PetscScalar *temp_basis,*correlation_mat; 4878 #else 4879 PetscBLASInt dummy_int=1; 4880 PetscScalar dummy_scalar=1.; 4881 #endif 4882 4883 /* Get index sets for faces, edges and vertices from graph */ 4884 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4885 /* print some info */ 4886 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 4887 PetscInt nv; 4888 4889 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4890 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4891 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4892 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4893 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4894 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4895 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4896 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4897 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4898 } 4899 4900 /* free unneeded index sets */ 4901 if (!pcbddc->use_vertices) { 4902 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4903 } 4904 if (!pcbddc->use_edges) { 4905 for (i=0;i<n_ISForEdges;i++) { 4906 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4907 } 4908 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4909 n_ISForEdges = 0; 4910 } 4911 if (!pcbddc->use_faces) { 4912 for (i=0;i<n_ISForFaces;i++) { 4913 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4914 } 4915 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4916 n_ISForFaces = 0; 4917 } 4918 4919 /* check if near null space is attached to global mat */ 4920 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4921 if (nearnullsp) { 4922 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4923 /* remove any stored info */ 4924 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4925 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4926 /* store information for BDDC solver reuse */ 4927 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4928 pcbddc->onearnullspace = nearnullsp; 4929 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4930 for (i=0;i<nnsp_size;i++) { 4931 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4932 } 4933 } else { /* if near null space is not provided BDDC uses constants by default */ 4934 nnsp_size = 0; 4935 nnsp_has_cnst = PETSC_TRUE; 4936 } 4937 /* get max number of constraints on a single cc */ 4938 max_constraints = nnsp_size; 4939 if (nnsp_has_cnst) max_constraints++; 4940 4941 /* 4942 Evaluate maximum storage size needed by the procedure 4943 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4944 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4945 There can be multiple constraints per connected component 4946 */ 4947 n_vertices = 0; 4948 if (ISForVertices) { 4949 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4950 } 4951 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4952 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4953 4954 total_counts = n_ISForFaces+n_ISForEdges; 4955 total_counts *= max_constraints; 4956 total_counts += n_vertices; 4957 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4958 4959 total_counts = 0; 4960 max_size_of_constraint = 0; 4961 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4962 IS used_is; 4963 if (i<n_ISForEdges) { 4964 used_is = ISForEdges[i]; 4965 } else { 4966 used_is = ISForFaces[i-n_ISForEdges]; 4967 } 4968 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4969 total_counts += j; 4970 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4971 } 4972 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4973 4974 /* get local part of global near null space vectors */ 4975 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4976 for (k=0;k<nnsp_size;k++) { 4977 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4978 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4979 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4980 } 4981 4982 /* whether or not to skip lapack calls */ 4983 skip_lapack = PETSC_TRUE; 4984 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4985 4986 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4987 if (!skip_lapack) { 4988 PetscScalar temp_work; 4989 4990 #if defined(PETSC_MISSING_LAPACK_GESVD) 4991 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4992 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4993 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4994 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4995 #if defined(PETSC_USE_COMPLEX) 4996 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4997 #endif 4998 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4999 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5000 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5001 lwork = -1; 5002 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5003 #if !defined(PETSC_USE_COMPLEX) 5004 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5005 #else 5006 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5007 #endif 5008 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5009 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5010 #else /* on missing GESVD */ 5011 /* SVD */ 5012 PetscInt max_n,min_n; 5013 max_n = max_size_of_constraint; 5014 min_n = max_constraints; 5015 if (max_size_of_constraint < max_constraints) { 5016 min_n = max_size_of_constraint; 5017 max_n = max_constraints; 5018 } 5019 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5020 #if defined(PETSC_USE_COMPLEX) 5021 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5022 #endif 5023 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5024 lwork = -1; 5025 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5026 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5027 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5028 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5029 #if !defined(PETSC_USE_COMPLEX) 5030 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5031 #else 5032 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5033 #endif 5034 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5035 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5036 #endif /* on missing GESVD */ 5037 /* Allocate optimal workspace */ 5038 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5039 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5040 } 5041 /* Now we can loop on constraining sets */ 5042 total_counts = 0; 5043 constraints_idxs_ptr[0] = 0; 5044 constraints_data_ptr[0] = 0; 5045 /* vertices */ 5046 if (n_vertices) { 5047 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5048 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5049 for (i=0;i<n_vertices;i++) { 5050 constraints_n[total_counts] = 1; 5051 constraints_data[total_counts] = 1.0; 5052 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5053 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5054 total_counts++; 5055 } 5056 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5057 n_vertices = total_counts; 5058 } 5059 5060 /* edges and faces */ 5061 total_counts_cc = total_counts; 5062 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5063 IS used_is; 5064 PetscBool idxs_copied = PETSC_FALSE; 5065 5066 if (ncc<n_ISForEdges) { 5067 used_is = ISForEdges[ncc]; 5068 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5069 } else { 5070 used_is = ISForFaces[ncc-n_ISForEdges]; 5071 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5072 } 5073 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5074 5075 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5076 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5077 /* change of basis should not be performed on local periodic nodes */ 5078 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5079 if (nnsp_has_cnst) { 5080 PetscScalar quad_value; 5081 5082 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5083 idxs_copied = PETSC_TRUE; 5084 5085 if (!pcbddc->use_nnsp_true) { 5086 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5087 } else { 5088 quad_value = 1.0; 5089 } 5090 for (j=0;j<size_of_constraint;j++) { 5091 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5092 } 5093 temp_constraints++; 5094 total_counts++; 5095 } 5096 for (k=0;k<nnsp_size;k++) { 5097 PetscReal real_value; 5098 PetscScalar *ptr_to_data; 5099 5100 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5101 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5102 for (j=0;j<size_of_constraint;j++) { 5103 ptr_to_data[j] = array[is_indices[j]]; 5104 } 5105 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5106 /* check if array is null on the connected component */ 5107 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5108 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5109 if (real_value > 0.0) { /* keep indices and values */ 5110 temp_constraints++; 5111 total_counts++; 5112 if (!idxs_copied) { 5113 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5114 idxs_copied = PETSC_TRUE; 5115 } 5116 } 5117 } 5118 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5119 valid_constraints = temp_constraints; 5120 if (!pcbddc->use_nnsp_true && temp_constraints) { 5121 if (temp_constraints == 1) { /* just normalize the constraint */ 5122 PetscScalar norm,*ptr_to_data; 5123 5124 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5126 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5127 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5128 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5129 } else { /* perform SVD */ 5130 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5131 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5132 5133 #if defined(PETSC_MISSING_LAPACK_GESVD) 5134 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5135 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5136 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5137 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5138 from that computed using LAPACKgesvd 5139 -> This is due to a different computation of eigenvectors in LAPACKheev 5140 -> The quality of the POD-computed basis will be the same */ 5141 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5142 /* Store upper triangular part of correlation matrix */ 5143 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5144 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5145 for (j=0;j<temp_constraints;j++) { 5146 for (k=0;k<j+1;k++) { 5147 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5148 } 5149 } 5150 /* compute eigenvalues and eigenvectors of correlation matrix */ 5151 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5152 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5153 #if !defined(PETSC_USE_COMPLEX) 5154 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5155 #else 5156 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5157 #endif 5158 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5159 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5160 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5161 j = 0; 5162 while (j < temp_constraints && singular_vals[j] < tol) j++; 5163 total_counts = total_counts-j; 5164 valid_constraints = temp_constraints-j; 5165 /* scale and copy POD basis into used quadrature memory */ 5166 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5167 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5168 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5169 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5170 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5171 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5172 if (j<temp_constraints) { 5173 PetscInt ii; 5174 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5175 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5176 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5177 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5178 for (k=0;k<temp_constraints-j;k++) { 5179 for (ii=0;ii<size_of_constraint;ii++) { 5180 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5181 } 5182 } 5183 } 5184 #else /* on missing GESVD */ 5185 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5186 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5187 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5188 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5189 #if !defined(PETSC_USE_COMPLEX) 5190 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5191 #else 5192 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5193 #endif 5194 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5195 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5196 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5197 k = temp_constraints; 5198 if (k > size_of_constraint) k = size_of_constraint; 5199 j = 0; 5200 while (j < k && singular_vals[k-j-1] < tol) j++; 5201 valid_constraints = k-j; 5202 total_counts = total_counts-temp_constraints+valid_constraints; 5203 #endif /* on missing GESVD */ 5204 } 5205 } 5206 /* update pointers information */ 5207 if (valid_constraints) { 5208 constraints_n[total_counts_cc] = valid_constraints; 5209 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5210 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5211 /* set change_of_basis flag */ 5212 if (boolforchange) { 5213 PetscBTSet(change_basis,total_counts_cc); 5214 } 5215 total_counts_cc++; 5216 } 5217 } 5218 /* free workspace */ 5219 if (!skip_lapack) { 5220 ierr = PetscFree(work);CHKERRQ(ierr); 5221 #if defined(PETSC_USE_COMPLEX) 5222 ierr = PetscFree(rwork);CHKERRQ(ierr); 5223 #endif 5224 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5225 #if defined(PETSC_MISSING_LAPACK_GESVD) 5226 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5227 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5228 #endif 5229 } 5230 for (k=0;k<nnsp_size;k++) { 5231 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5232 } 5233 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5234 /* free index sets of faces, edges and vertices */ 5235 for (i=0;i<n_ISForFaces;i++) { 5236 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5237 } 5238 if (n_ISForFaces) { 5239 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5240 } 5241 for (i=0;i<n_ISForEdges;i++) { 5242 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5243 } 5244 if (n_ISForEdges) { 5245 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5246 } 5247 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5248 } else { 5249 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5250 5251 total_counts = 0; 5252 n_vertices = 0; 5253 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5254 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5255 } 5256 max_constraints = 0; 5257 total_counts_cc = 0; 5258 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5259 total_counts += pcbddc->adaptive_constraints_n[i]; 5260 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5261 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5262 } 5263 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5264 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5265 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5266 constraints_data = pcbddc->adaptive_constraints_data; 5267 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5268 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5269 total_counts_cc = 0; 5270 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5271 if (pcbddc->adaptive_constraints_n[i]) { 5272 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5273 } 5274 } 5275 #if 0 5276 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5277 for (i=0;i<total_counts_cc;i++) { 5278 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5279 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5280 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5281 printf(" %d",constraints_idxs[j]); 5282 } 5283 printf("\n"); 5284 printf("number of cc: %d\n",constraints_n[i]); 5285 } 5286 for (i=0;i<n_vertices;i++) { 5287 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5288 } 5289 for (i=0;i<sub_schurs->n_subs;i++) { 5290 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5291 } 5292 #endif 5293 5294 max_size_of_constraint = 0; 5295 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5296 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5297 /* Change of basis */ 5298 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5299 if (pcbddc->use_change_of_basis) { 5300 for (i=0;i<sub_schurs->n_subs;i++) { 5301 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5302 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5303 } 5304 } 5305 } 5306 } 5307 pcbddc->local_primal_size = total_counts; 5308 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5309 5310 /* map constraints_idxs in boundary numbering */ 5311 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5312 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5313 5314 /* Create constraint matrix */ 5315 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5316 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5317 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5318 5319 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5320 /* determine if a QR strategy is needed for change of basis */ 5321 qr_needed = PETSC_FALSE; 5322 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5323 total_primal_vertices=0; 5324 pcbddc->local_primal_size_cc = 0; 5325 for (i=0;i<total_counts_cc;i++) { 5326 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5327 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5328 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5329 pcbddc->local_primal_size_cc += 1; 5330 } else if (PetscBTLookup(change_basis,i)) { 5331 for (k=0;k<constraints_n[i];k++) { 5332 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5333 } 5334 pcbddc->local_primal_size_cc += constraints_n[i]; 5335 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5336 PetscBTSet(qr_needed_idx,i); 5337 qr_needed = PETSC_TRUE; 5338 } 5339 } else { 5340 pcbddc->local_primal_size_cc += 1; 5341 } 5342 } 5343 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5344 pcbddc->n_vertices = total_primal_vertices; 5345 /* permute indices in order to have a sorted set of vertices */ 5346 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5347 5348 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5349 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5350 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5351 5352 /* nonzero structure of constraint matrix */ 5353 /* and get reference dof for local constraints */ 5354 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5355 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5356 5357 j = total_primal_vertices; 5358 total_counts = total_primal_vertices; 5359 cum = total_primal_vertices; 5360 for (i=n_vertices;i<total_counts_cc;i++) { 5361 if (!PetscBTLookup(change_basis,i)) { 5362 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5363 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5364 cum++; 5365 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5366 for (k=0;k<constraints_n[i];k++) { 5367 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5368 nnz[j+k] = size_of_constraint; 5369 } 5370 j += constraints_n[i]; 5371 } 5372 } 5373 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5374 ierr = PetscFree(nnz);CHKERRQ(ierr); 5375 5376 /* set values in constraint matrix */ 5377 for (i=0;i<total_primal_vertices;i++) { 5378 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5379 } 5380 total_counts = total_primal_vertices; 5381 for (i=n_vertices;i<total_counts_cc;i++) { 5382 if (!PetscBTLookup(change_basis,i)) { 5383 PetscInt *cols; 5384 5385 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5386 cols = constraints_idxs+constraints_idxs_ptr[i]; 5387 for (k=0;k<constraints_n[i];k++) { 5388 PetscInt row = total_counts+k; 5389 PetscScalar *vals; 5390 5391 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5392 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5393 } 5394 total_counts += constraints_n[i]; 5395 } 5396 } 5397 /* assembling */ 5398 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5399 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5400 5401 /* 5402 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5403 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5404 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5405 */ 5406 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5407 if (pcbddc->use_change_of_basis) { 5408 /* dual and primal dofs on a single cc */ 5409 PetscInt dual_dofs,primal_dofs; 5410 /* working stuff for GEQRF */ 5411 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5412 PetscBLASInt lqr_work; 5413 /* working stuff for UNGQR */ 5414 PetscScalar *gqr_work,lgqr_work_t; 5415 PetscBLASInt lgqr_work; 5416 /* working stuff for TRTRS */ 5417 PetscScalar *trs_rhs; 5418 PetscBLASInt Blas_NRHS; 5419 /* pointers for values insertion into change of basis matrix */ 5420 PetscInt *start_rows,*start_cols; 5421 PetscScalar *start_vals; 5422 /* working stuff for values insertion */ 5423 PetscBT is_primal; 5424 PetscInt *aux_primal_numbering_B; 5425 /* matrix sizes */ 5426 PetscInt global_size,local_size; 5427 /* temporary change of basis */ 5428 Mat localChangeOfBasisMatrix; 5429 /* extra space for debugging */ 5430 PetscScalar *dbg_work; 5431 5432 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5433 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5434 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5435 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5436 /* nonzeros for local mat */ 5437 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5438 if (!pcbddc->benign_change || pcbddc->fake_change) { 5439 for (i=0;i<pcis->n;i++) nnz[i]=1; 5440 } else { 5441 const PetscInt *ii; 5442 PetscInt n; 5443 PetscBool flg_row; 5444 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5445 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5446 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5447 } 5448 for (i=n_vertices;i<total_counts_cc;i++) { 5449 if (PetscBTLookup(change_basis,i)) { 5450 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5451 if (PetscBTLookup(qr_needed_idx,i)) { 5452 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5453 } else { 5454 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5455 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5456 } 5457 } 5458 } 5459 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5460 ierr = PetscFree(nnz);CHKERRQ(ierr); 5461 /* Set interior change in the matrix */ 5462 if (!pcbddc->benign_change || pcbddc->fake_change) { 5463 for (i=0;i<pcis->n;i++) { 5464 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5465 } 5466 } else { 5467 const PetscInt *ii,*jj; 5468 PetscScalar *aa; 5469 PetscInt n; 5470 PetscBool flg_row; 5471 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5472 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5473 for (i=0;i<n;i++) { 5474 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5475 } 5476 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5477 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5478 } 5479 5480 if (pcbddc->dbg_flag) { 5481 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5482 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5483 } 5484 5485 5486 /* Now we loop on the constraints which need a change of basis */ 5487 /* 5488 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5489 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5490 5491 Basic blocks of change of basis matrix T computed by 5492 5493 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5494 5495 | 1 0 ... 0 s_1/S | 5496 | 0 1 ... 0 s_2/S | 5497 | ... | 5498 | 0 ... 1 s_{n-1}/S | 5499 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5500 5501 with S = \sum_{i=1}^n s_i^2 5502 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5503 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5504 5505 - QR decomposition of constraints otherwise 5506 */ 5507 if (qr_needed) { 5508 /* space to store Q */ 5509 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5510 /* array to store scaling factors for reflectors */ 5511 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5512 /* first we issue queries for optimal work */ 5513 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5514 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5515 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5516 lqr_work = -1; 5517 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5518 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5519 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5520 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5521 lgqr_work = -1; 5522 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5523 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5524 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5525 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5526 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5527 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5528 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5529 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5530 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5531 /* array to store rhs and solution of triangular solver */ 5532 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5533 /* allocating workspace for check */ 5534 if (pcbddc->dbg_flag) { 5535 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5536 } 5537 } 5538 /* array to store whether a node is primal or not */ 5539 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5540 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5541 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5542 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5543 for (i=0;i<total_primal_vertices;i++) { 5544 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5545 } 5546 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5547 5548 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5549 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5550 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5551 if (PetscBTLookup(change_basis,total_counts)) { 5552 /* get constraint info */ 5553 primal_dofs = constraints_n[total_counts]; 5554 dual_dofs = size_of_constraint-primal_dofs; 5555 5556 if (pcbddc->dbg_flag) { 5557 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5558 } 5559 5560 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5561 5562 /* copy quadrature constraints for change of basis check */ 5563 if (pcbddc->dbg_flag) { 5564 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5565 } 5566 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5567 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5568 5569 /* compute QR decomposition of constraints */ 5570 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5571 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5572 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5573 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5574 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5575 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5576 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5577 5578 /* explictly compute R^-T */ 5579 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5580 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5581 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5582 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5583 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5584 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5585 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5586 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5587 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5588 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5589 5590 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5591 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5592 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5593 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5594 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5595 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5596 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5597 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5598 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5599 5600 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5601 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5602 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5603 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5604 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5605 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5606 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5607 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5608 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5609 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5610 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5611 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5612 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5613 5614 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5615 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5616 /* insert cols for primal dofs */ 5617 for (j=0;j<primal_dofs;j++) { 5618 start_vals = &qr_basis[j*size_of_constraint]; 5619 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5620 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5621 } 5622 /* insert cols for dual dofs */ 5623 for (j=0,k=0;j<dual_dofs;k++) { 5624 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5625 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5626 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5627 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5628 j++; 5629 } 5630 } 5631 5632 /* check change of basis */ 5633 if (pcbddc->dbg_flag) { 5634 PetscInt ii,jj; 5635 PetscBool valid_qr=PETSC_TRUE; 5636 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5637 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5638 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5639 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5640 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5641 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5642 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5643 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5644 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5645 for (jj=0;jj<size_of_constraint;jj++) { 5646 for (ii=0;ii<primal_dofs;ii++) { 5647 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5648 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5649 } 5650 } 5651 if (!valid_qr) { 5652 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5653 for (jj=0;jj<size_of_constraint;jj++) { 5654 for (ii=0;ii<primal_dofs;ii++) { 5655 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5656 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5657 } 5658 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5659 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5660 } 5661 } 5662 } 5663 } else { 5664 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5665 } 5666 } 5667 } else { /* simple transformation block */ 5668 PetscInt row,col; 5669 PetscScalar val,norm; 5670 5671 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5672 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5673 for (j=0;j<size_of_constraint;j++) { 5674 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5675 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5676 if (!PetscBTLookup(is_primal,row_B)) { 5677 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5678 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5679 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5680 } else { 5681 for (k=0;k<size_of_constraint;k++) { 5682 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5683 if (row != col) { 5684 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5685 } else { 5686 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5687 } 5688 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5689 } 5690 } 5691 } 5692 if (pcbddc->dbg_flag) { 5693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5694 } 5695 } 5696 } else { 5697 if (pcbddc->dbg_flag) { 5698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5699 } 5700 } 5701 } 5702 5703 /* free workspace */ 5704 if (qr_needed) { 5705 if (pcbddc->dbg_flag) { 5706 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5707 } 5708 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5709 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5710 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5711 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5712 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5713 } 5714 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5715 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5716 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5717 5718 /* assembling of global change of variable */ 5719 if (!pcbddc->fake_change) { 5720 Mat tmat; 5721 PetscInt bs; 5722 5723 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5724 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5725 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5726 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5727 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5728 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5729 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5730 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5731 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5732 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5733 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5734 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5735 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5736 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5737 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5738 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5739 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5740 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5741 5742 /* check */ 5743 if (pcbddc->dbg_flag) { 5744 PetscReal error; 5745 Vec x,x_change; 5746 5747 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5748 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5749 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5750 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5751 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5752 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5753 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5754 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5755 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5756 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5757 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5758 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5759 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5760 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5761 ierr = VecDestroy(&x);CHKERRQ(ierr); 5762 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5763 } 5764 /* adapt sub_schurs computed (if any) */ 5765 if (pcbddc->use_deluxe_scaling) { 5766 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5767 5768 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5769 if (sub_schurs && sub_schurs->S_Ej_all) { 5770 Mat S_new,tmat; 5771 IS is_all_N,is_V_Sall = NULL; 5772 5773 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5774 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5775 if (pcbddc->deluxe_zerorows) { 5776 ISLocalToGlobalMapping NtoSall; 5777 IS is_V; 5778 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5779 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5780 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5781 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5782 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5783 } 5784 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5785 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5786 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5787 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5788 if (pcbddc->deluxe_zerorows) { 5789 const PetscScalar *array; 5790 const PetscInt *idxs_V,*idxs_all; 5791 PetscInt i,n_V; 5792 5793 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5794 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5795 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5796 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5797 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5798 for (i=0;i<n_V;i++) { 5799 PetscScalar val; 5800 PetscInt idx; 5801 5802 idx = idxs_V[i]; 5803 val = array[idxs_all[idxs_V[i]]]; 5804 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5805 } 5806 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5807 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5808 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5809 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5810 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5811 } 5812 sub_schurs->S_Ej_all = S_new; 5813 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5814 if (sub_schurs->sum_S_Ej_all) { 5815 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5816 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5817 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5818 if (pcbddc->deluxe_zerorows) { 5819 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5820 } 5821 sub_schurs->sum_S_Ej_all = S_new; 5822 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5823 } 5824 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5825 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5826 } 5827 /* destroy any change of basis context in sub_schurs */ 5828 if (sub_schurs && sub_schurs->change) { 5829 PetscInt i; 5830 5831 for (i=0;i<sub_schurs->n_subs;i++) { 5832 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5833 } 5834 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5835 } 5836 } 5837 if (pcbddc->switch_static) { /* need to save the local change */ 5838 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5839 } else { 5840 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5841 } 5842 /* determine if any process has changed the pressures locally */ 5843 pcbddc->change_interior = pcbddc->benign_have_null; 5844 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5845 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5846 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5847 pcbddc->use_qr_single = qr_needed; 5848 } 5849 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5850 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5851 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5852 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5853 } else { 5854 Mat benign_global = NULL; 5855 if (pcbddc->benign_have_null) { 5856 Mat tmat; 5857 5858 pcbddc->change_interior = PETSC_TRUE; 5859 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5860 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5861 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5862 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5863 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5864 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5865 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5866 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5867 if (pcbddc->benign_change) { 5868 Mat M; 5869 5870 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5871 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5872 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5873 ierr = MatDestroy(&M);CHKERRQ(ierr); 5874 } else { 5875 Mat eye; 5876 PetscScalar *array; 5877 5878 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5879 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5880 for (i=0;i<pcis->n;i++) { 5881 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5882 } 5883 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5884 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5885 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5886 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5887 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5888 } 5889 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5890 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5891 } 5892 if (pcbddc->user_ChangeOfBasisMatrix) { 5893 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5894 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5895 } else if (pcbddc->benign_have_null) { 5896 pcbddc->ChangeOfBasisMatrix = benign_global; 5897 } 5898 } 5899 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5900 IS is_global; 5901 const PetscInt *gidxs; 5902 5903 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5904 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5905 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5906 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5907 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5908 } 5909 } 5910 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5911 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5912 } 5913 5914 if (!pcbddc->fake_change) { 5915 /* add pressure dofs to set of primal nodes for numbering purposes */ 5916 for (i=0;i<pcbddc->benign_n;i++) { 5917 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5918 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5919 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5920 pcbddc->local_primal_size_cc++; 5921 pcbddc->local_primal_size++; 5922 } 5923 5924 /* check if a new primal space has been introduced (also take into account benign trick) */ 5925 pcbddc->new_primal_space_local = PETSC_TRUE; 5926 if (olocal_primal_size == pcbddc->local_primal_size) { 5927 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5928 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5929 if (!pcbddc->new_primal_space_local) { 5930 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5931 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5932 } 5933 } 5934 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5935 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5936 } 5937 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5938 5939 /* flush dbg viewer */ 5940 if (pcbddc->dbg_flag) { 5941 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5942 } 5943 5944 /* free workspace */ 5945 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5946 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5947 if (!pcbddc->adaptive_selection) { 5948 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5949 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5950 } else { 5951 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5952 pcbddc->adaptive_constraints_idxs_ptr, 5953 pcbddc->adaptive_constraints_data_ptr, 5954 pcbddc->adaptive_constraints_idxs, 5955 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5956 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5957 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5958 } 5959 PetscFunctionReturn(0); 5960 } 5961 5962 #undef __FUNCT__ 5963 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5964 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5965 { 5966 ISLocalToGlobalMapping map; 5967 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5968 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5969 PetscInt ierr,i,N; 5970 5971 PetscFunctionBegin; 5972 if (pcbddc->recompute_topography) { 5973 pcbddc->graphanalyzed = PETSC_FALSE; 5974 /* Reset previously computed graph */ 5975 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5976 /* Init local Graph struct */ 5977 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5978 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5979 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5980 5981 /* Check validity of the csr graph passed in by the user */ 5982 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5983 5984 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5985 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5986 PetscInt *xadj,*adjncy; 5987 PetscInt nvtxs; 5988 PetscBool flg_row=PETSC_FALSE; 5989 5990 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5991 if (flg_row) { 5992 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5993 pcbddc->computed_rowadj = PETSC_TRUE; 5994 } 5995 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5996 } 5997 if (pcbddc->dbg_flag) { 5998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5999 } 6000 6001 /* Setup of Graph */ 6002 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6003 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6004 6005 /* attach info on disconnected subdomains if present */ 6006 if (pcbddc->n_local_subs) { 6007 PetscInt *local_subs; 6008 6009 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6010 for (i=0;i<pcbddc->n_local_subs;i++) { 6011 const PetscInt *idxs; 6012 PetscInt nl,j; 6013 6014 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6015 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6016 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6017 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6018 } 6019 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6020 pcbddc->mat_graph->local_subs = local_subs; 6021 } 6022 } 6023 6024 if (!pcbddc->graphanalyzed) { 6025 /* Graph's connected components analysis */ 6026 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6027 pcbddc->graphanalyzed = PETSC_TRUE; 6028 } 6029 PetscFunctionReturn(0); 6030 } 6031 6032 #undef __FUNCT__ 6033 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 6034 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6035 { 6036 PetscInt i,j; 6037 PetscScalar *alphas; 6038 PetscErrorCode ierr; 6039 6040 PetscFunctionBegin; 6041 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6042 for (i=0;i<n;i++) { 6043 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6044 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6045 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6046 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6047 } 6048 ierr = PetscFree(alphas);CHKERRQ(ierr); 6049 PetscFunctionReturn(0); 6050 } 6051 6052 #undef __FUNCT__ 6053 #define __FUNCT__ "MatISGetSubassemblingPattern" 6054 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6055 { 6056 Mat A; 6057 PetscInt n_neighs,*neighs,*n_shared,**shared; 6058 PetscMPIInt size,rank,color; 6059 PetscInt *xadj,*adjncy; 6060 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6061 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 6062 PetscInt void_procs,*procs_candidates = NULL; 6063 PetscInt xadj_count, *count; 6064 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6065 PetscSubcomm psubcomm; 6066 MPI_Comm subcomm; 6067 PetscErrorCode ierr; 6068 6069 PetscFunctionBegin; 6070 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6071 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6072 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6073 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6074 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6075 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6076 6077 if (have_void) *have_void = PETSC_FALSE; 6078 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6079 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6080 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6081 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6082 im_active = !!(n); 6083 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6084 void_procs = size - active_procs; 6085 /* get ranks of of non-active processes in mat communicator */ 6086 if (void_procs) { 6087 PetscInt ncand; 6088 6089 if (have_void) *have_void = PETSC_TRUE; 6090 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6091 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6092 for (i=0,ncand=0;i<size;i++) { 6093 if (!procs_candidates[i]) { 6094 procs_candidates[ncand++] = i; 6095 } 6096 } 6097 /* force n_subdomains to be not greater that the number of non-active processes */ 6098 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6099 } 6100 6101 /* number of subdomains requested greater than active processes -> just shift the matrix 6102 number of subdomains requested 1 -> send to master or first candidate in voids */ 6103 if (active_procs < *n_subdomains || *n_subdomains == 1) { 6104 PetscInt issize,isidx,dest; 6105 if (*n_subdomains == 1) dest = 0; 6106 else dest = rank; 6107 if (im_active) { 6108 issize = 1; 6109 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6110 isidx = procs_candidates[dest]; 6111 } else { 6112 isidx = dest; 6113 } 6114 } else { 6115 issize = 0; 6116 isidx = -1; 6117 } 6118 *n_subdomains = active_procs; 6119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6120 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6121 PetscFunctionReturn(0); 6122 } 6123 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6124 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6125 threshold = PetscMax(threshold,2); 6126 6127 /* Get info on mapping */ 6128 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 6129 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6130 6131 /* build local CSR graph of subdomains' connectivity */ 6132 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6133 xadj[0] = 0; 6134 xadj[1] = PetscMax(n_neighs-1,0); 6135 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6136 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6137 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 6138 for (i=1;i<n_neighs;i++) 6139 for (j=0;j<n_shared[i];j++) 6140 count[shared[i][j]] += 1; 6141 6142 xadj_count = 0; 6143 for (i=1;i<n_neighs;i++) { 6144 for (j=0;j<n_shared[i];j++) { 6145 if (count[shared[i][j]] < threshold) { 6146 adjncy[xadj_count] = neighs[i]; 6147 adjncy_wgt[xadj_count] = n_shared[i]; 6148 xadj_count++; 6149 break; 6150 } 6151 } 6152 } 6153 xadj[1] = xadj_count; 6154 ierr = PetscFree(count);CHKERRQ(ierr); 6155 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6156 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6157 6158 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6159 6160 /* Restrict work on active processes only */ 6161 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6162 if (void_procs) { 6163 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6164 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6165 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6166 subcomm = PetscSubcommChild(psubcomm); 6167 } else { 6168 psubcomm = NULL; 6169 subcomm = PetscObjectComm((PetscObject)mat); 6170 } 6171 6172 v_wgt = NULL; 6173 if (!color) { 6174 ierr = PetscFree(xadj);CHKERRQ(ierr); 6175 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6176 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6177 } else { 6178 Mat subdomain_adj; 6179 IS new_ranks,new_ranks_contig; 6180 MatPartitioning partitioner; 6181 PetscInt rstart=0,rend=0; 6182 PetscInt *is_indices,*oldranks; 6183 PetscMPIInt size; 6184 PetscBool aggregate; 6185 6186 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6187 if (void_procs) { 6188 PetscInt prank = rank; 6189 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6190 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6191 for (i=0;i<xadj[1];i++) { 6192 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6193 } 6194 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6195 } else { 6196 oldranks = NULL; 6197 } 6198 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6199 if (aggregate) { /* TODO: all this part could be made more efficient */ 6200 PetscInt lrows,row,ncols,*cols; 6201 PetscMPIInt nrank; 6202 PetscScalar *vals; 6203 6204 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6205 lrows = 0; 6206 if (nrank<redprocs) { 6207 lrows = size/redprocs; 6208 if (nrank<size%redprocs) lrows++; 6209 } 6210 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6211 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6212 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6213 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6214 row = nrank; 6215 ncols = xadj[1]-xadj[0]; 6216 cols = adjncy; 6217 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6218 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6219 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6220 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6221 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6222 ierr = PetscFree(xadj);CHKERRQ(ierr); 6223 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6224 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6225 ierr = PetscFree(vals);CHKERRQ(ierr); 6226 if (use_vwgt) { 6227 Vec v; 6228 const PetscScalar *array; 6229 PetscInt nl; 6230 6231 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6232 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6233 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6234 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6235 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6236 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6237 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6238 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6239 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6240 ierr = VecDestroy(&v);CHKERRQ(ierr); 6241 } 6242 } else { 6243 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6244 if (use_vwgt) { 6245 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6246 v_wgt[0] = local_size; 6247 } 6248 } 6249 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6250 6251 /* Partition */ 6252 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6253 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6254 if (v_wgt) { 6255 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6256 } 6257 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6258 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6259 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6260 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6261 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6262 6263 /* renumber new_ranks to avoid "holes" in new set of processors */ 6264 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6265 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6266 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6267 if (!aggregate) { 6268 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6269 #if defined(PETSC_USE_DEBUG) 6270 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6271 #endif 6272 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6273 } else if (oldranks) { 6274 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6275 } else { 6276 ranks_send_to_idx[0] = is_indices[0]; 6277 } 6278 } else { 6279 PetscInt idxs[1]; 6280 PetscMPIInt tag; 6281 MPI_Request *reqs; 6282 6283 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6284 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6285 for (i=rstart;i<rend;i++) { 6286 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6287 } 6288 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6289 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6290 ierr = PetscFree(reqs);CHKERRQ(ierr); 6291 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6292 #if defined(PETSC_USE_DEBUG) 6293 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6294 #endif 6295 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6296 } else if (oldranks) { 6297 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6298 } else { 6299 ranks_send_to_idx[0] = idxs[0]; 6300 } 6301 } 6302 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6303 /* clean up */ 6304 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6305 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6306 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6307 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6308 } 6309 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6310 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6311 6312 /* assemble parallel IS for sends */ 6313 i = 1; 6314 if (!color) i=0; 6315 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6316 PetscFunctionReturn(0); 6317 } 6318 6319 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6320 6321 #undef __FUNCT__ 6322 #define __FUNCT__ "MatISSubassemble" 6323 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6324 { 6325 Mat local_mat; 6326 IS is_sends_internal; 6327 PetscInt rows,cols,new_local_rows; 6328 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6329 PetscBool ismatis,isdense,newisdense,destroy_mat; 6330 ISLocalToGlobalMapping l2gmap; 6331 PetscInt* l2gmap_indices; 6332 const PetscInt* is_indices; 6333 MatType new_local_type; 6334 /* buffers */ 6335 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6336 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6337 PetscInt *recv_buffer_idxs_local; 6338 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6339 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6340 /* MPI */ 6341 MPI_Comm comm,comm_n; 6342 PetscSubcomm subcomm; 6343 PetscMPIInt n_sends,n_recvs,commsize; 6344 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6345 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6346 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6347 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6348 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6349 PetscErrorCode ierr; 6350 6351 PetscFunctionBegin; 6352 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6353 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6354 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6355 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6356 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6357 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6358 PetscValidLogicalCollectiveBool(mat,reuse,6); 6359 PetscValidLogicalCollectiveInt(mat,nis,8); 6360 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6361 if (nvecs) { 6362 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6363 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6364 } 6365 /* further checks */ 6366 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6367 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6368 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6369 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6370 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6371 if (reuse && *mat_n) { 6372 PetscInt mrows,mcols,mnrows,mncols; 6373 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6374 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6375 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6376 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6377 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6378 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6379 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6380 } 6381 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6382 PetscValidLogicalCollectiveInt(mat,bs,0); 6383 6384 /* prepare IS for sending if not provided */ 6385 if (!is_sends) { 6386 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6387 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6388 } else { 6389 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6390 is_sends_internal = is_sends; 6391 } 6392 6393 /* get comm */ 6394 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6395 6396 /* compute number of sends */ 6397 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6398 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6399 6400 /* compute number of receives */ 6401 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6402 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6403 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6404 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6405 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6406 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6407 ierr = PetscFree(iflags);CHKERRQ(ierr); 6408 6409 /* restrict comm if requested */ 6410 subcomm = 0; 6411 destroy_mat = PETSC_FALSE; 6412 if (restrict_comm) { 6413 PetscMPIInt color,subcommsize; 6414 6415 color = 0; 6416 if (restrict_full) { 6417 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6418 } else { 6419 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6420 } 6421 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6422 subcommsize = commsize - subcommsize; 6423 /* check if reuse has been requested */ 6424 if (reuse) { 6425 if (*mat_n) { 6426 PetscMPIInt subcommsize2; 6427 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6428 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6429 comm_n = PetscObjectComm((PetscObject)*mat_n); 6430 } else { 6431 comm_n = PETSC_COMM_SELF; 6432 } 6433 } else { /* MAT_INITIAL_MATRIX */ 6434 PetscMPIInt rank; 6435 6436 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6437 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6438 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6439 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6440 comm_n = PetscSubcommChild(subcomm); 6441 } 6442 /* flag to destroy *mat_n if not significative */ 6443 if (color) destroy_mat = PETSC_TRUE; 6444 } else { 6445 comm_n = comm; 6446 } 6447 6448 /* prepare send/receive buffers */ 6449 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6450 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6451 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6452 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6453 if (nis) { 6454 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6455 } 6456 6457 /* Get data from local matrices */ 6458 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6459 /* TODO: See below some guidelines on how to prepare the local buffers */ 6460 /* 6461 send_buffer_vals should contain the raw values of the local matrix 6462 send_buffer_idxs should contain: 6463 - MatType_PRIVATE type 6464 - PetscInt size_of_l2gmap 6465 - PetscInt global_row_indices[size_of_l2gmap] 6466 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6467 */ 6468 else { 6469 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6470 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6471 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6472 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6473 send_buffer_idxs[1] = i; 6474 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6475 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6476 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6477 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6478 for (i=0;i<n_sends;i++) { 6479 ilengths_vals[is_indices[i]] = len*len; 6480 ilengths_idxs[is_indices[i]] = len+2; 6481 } 6482 } 6483 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6484 /* additional is (if any) */ 6485 if (nis) { 6486 PetscMPIInt psum; 6487 PetscInt j; 6488 for (j=0,psum=0;j<nis;j++) { 6489 PetscInt plen; 6490 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6491 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6492 psum += len+1; /* indices + lenght */ 6493 } 6494 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6495 for (j=0,psum=0;j<nis;j++) { 6496 PetscInt plen; 6497 const PetscInt *is_array_idxs; 6498 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6499 send_buffer_idxs_is[psum] = plen; 6500 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6501 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6502 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6503 psum += plen+1; /* indices + lenght */ 6504 } 6505 for (i=0;i<n_sends;i++) { 6506 ilengths_idxs_is[is_indices[i]] = psum; 6507 } 6508 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6509 } 6510 6511 buf_size_idxs = 0; 6512 buf_size_vals = 0; 6513 buf_size_idxs_is = 0; 6514 buf_size_vecs = 0; 6515 for (i=0;i<n_recvs;i++) { 6516 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6517 buf_size_vals += (PetscInt)olengths_vals[i]; 6518 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6519 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6520 } 6521 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6522 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6523 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6524 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6525 6526 /* get new tags for clean communications */ 6527 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6528 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6529 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6530 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6531 6532 /* allocate for requests */ 6533 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6534 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6535 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6536 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6537 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6538 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6539 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6540 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6541 6542 /* communications */ 6543 ptr_idxs = recv_buffer_idxs; 6544 ptr_vals = recv_buffer_vals; 6545 ptr_idxs_is = recv_buffer_idxs_is; 6546 ptr_vecs = recv_buffer_vecs; 6547 for (i=0;i<n_recvs;i++) { 6548 source_dest = onodes[i]; 6549 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6550 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6551 ptr_idxs += olengths_idxs[i]; 6552 ptr_vals += olengths_vals[i]; 6553 if (nis) { 6554 source_dest = onodes_is[i]; 6555 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6556 ptr_idxs_is += olengths_idxs_is[i]; 6557 } 6558 if (nvecs) { 6559 source_dest = onodes[i]; 6560 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6561 ptr_vecs += olengths_idxs[i]-2; 6562 } 6563 } 6564 for (i=0;i<n_sends;i++) { 6565 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6566 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6567 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6568 if (nis) { 6569 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6570 } 6571 if (nvecs) { 6572 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6573 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6574 } 6575 } 6576 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6577 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6578 6579 /* assemble new l2g map */ 6580 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6581 ptr_idxs = recv_buffer_idxs; 6582 new_local_rows = 0; 6583 for (i=0;i<n_recvs;i++) { 6584 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6585 ptr_idxs += olengths_idxs[i]; 6586 } 6587 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6588 ptr_idxs = recv_buffer_idxs; 6589 new_local_rows = 0; 6590 for (i=0;i<n_recvs;i++) { 6591 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6592 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6593 ptr_idxs += olengths_idxs[i]; 6594 } 6595 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6596 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6597 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6598 6599 /* infer new local matrix type from received local matrices type */ 6600 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6601 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6602 if (n_recvs) { 6603 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6604 ptr_idxs = recv_buffer_idxs; 6605 for (i=0;i<n_recvs;i++) { 6606 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6607 new_local_type_private = MATAIJ_PRIVATE; 6608 break; 6609 } 6610 ptr_idxs += olengths_idxs[i]; 6611 } 6612 switch (new_local_type_private) { 6613 case MATDENSE_PRIVATE: 6614 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6615 new_local_type = MATSEQAIJ; 6616 bs = 1; 6617 } else { /* if I receive only 1 dense matrix */ 6618 new_local_type = MATSEQDENSE; 6619 bs = 1; 6620 } 6621 break; 6622 case MATAIJ_PRIVATE: 6623 new_local_type = MATSEQAIJ; 6624 bs = 1; 6625 break; 6626 case MATBAIJ_PRIVATE: 6627 new_local_type = MATSEQBAIJ; 6628 break; 6629 case MATSBAIJ_PRIVATE: 6630 new_local_type = MATSEQSBAIJ; 6631 break; 6632 default: 6633 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6634 break; 6635 } 6636 } else { /* by default, new_local_type is seqdense */ 6637 new_local_type = MATSEQDENSE; 6638 bs = 1; 6639 } 6640 6641 /* create MATIS object if needed */ 6642 if (!reuse) { 6643 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6644 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6645 } else { 6646 /* it also destroys the local matrices */ 6647 if (*mat_n) { 6648 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6649 } else { /* this is a fake object */ 6650 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6651 } 6652 } 6653 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6654 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6655 6656 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6657 6658 /* Global to local map of received indices */ 6659 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6660 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6661 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6662 6663 /* restore attributes -> type of incoming data and its size */ 6664 buf_size_idxs = 0; 6665 for (i=0;i<n_recvs;i++) { 6666 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6667 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6668 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6669 } 6670 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6671 6672 /* set preallocation */ 6673 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6674 if (!newisdense) { 6675 PetscInt *new_local_nnz=0; 6676 6677 ptr_idxs = recv_buffer_idxs_local; 6678 if (n_recvs) { 6679 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6680 } 6681 for (i=0;i<n_recvs;i++) { 6682 PetscInt j; 6683 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6684 for (j=0;j<*(ptr_idxs+1);j++) { 6685 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6686 } 6687 } else { 6688 /* TODO */ 6689 } 6690 ptr_idxs += olengths_idxs[i]; 6691 } 6692 if (new_local_nnz) { 6693 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6694 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6695 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6696 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6697 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6698 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6699 } else { 6700 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6701 } 6702 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6703 } else { 6704 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6705 } 6706 6707 /* set values */ 6708 ptr_vals = recv_buffer_vals; 6709 ptr_idxs = recv_buffer_idxs_local; 6710 for (i=0;i<n_recvs;i++) { 6711 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6712 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6713 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6714 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6715 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6716 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6717 } else { 6718 /* TODO */ 6719 } 6720 ptr_idxs += olengths_idxs[i]; 6721 ptr_vals += olengths_vals[i]; 6722 } 6723 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6724 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6725 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6726 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6727 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6728 6729 #if 0 6730 if (!restrict_comm) { /* check */ 6731 Vec lvec,rvec; 6732 PetscReal infty_error; 6733 6734 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6735 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6736 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6737 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6738 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6739 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6740 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6741 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6742 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6743 } 6744 #endif 6745 6746 /* assemble new additional is (if any) */ 6747 if (nis) { 6748 PetscInt **temp_idxs,*count_is,j,psum; 6749 6750 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6751 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6752 ptr_idxs = recv_buffer_idxs_is; 6753 psum = 0; 6754 for (i=0;i<n_recvs;i++) { 6755 for (j=0;j<nis;j++) { 6756 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6757 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6758 psum += plen; 6759 ptr_idxs += plen+1; /* shift pointer to received data */ 6760 } 6761 } 6762 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6763 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6764 for (i=1;i<nis;i++) { 6765 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6766 } 6767 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6768 ptr_idxs = recv_buffer_idxs_is; 6769 for (i=0;i<n_recvs;i++) { 6770 for (j=0;j<nis;j++) { 6771 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6772 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6773 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6774 ptr_idxs += plen+1; /* shift pointer to received data */ 6775 } 6776 } 6777 for (i=0;i<nis;i++) { 6778 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6779 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6780 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6781 } 6782 ierr = PetscFree(count_is);CHKERRQ(ierr); 6783 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6784 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6785 } 6786 /* free workspace */ 6787 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6788 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6789 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6790 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6791 if (isdense) { 6792 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6793 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6794 } else { 6795 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6796 } 6797 if (nis) { 6798 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6799 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6800 } 6801 6802 if (nvecs) { 6803 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6804 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6805 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6806 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6807 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6808 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6809 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6810 /* set values */ 6811 ptr_vals = recv_buffer_vecs; 6812 ptr_idxs = recv_buffer_idxs_local; 6813 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6814 for (i=0;i<n_recvs;i++) { 6815 PetscInt j; 6816 for (j=0;j<*(ptr_idxs+1);j++) { 6817 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6818 } 6819 ptr_idxs += olengths_idxs[i]; 6820 ptr_vals += olengths_idxs[i]-2; 6821 } 6822 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6823 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6824 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6825 } 6826 6827 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6828 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6829 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6830 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6831 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6832 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6833 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6834 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6835 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6836 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6837 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6838 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6839 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6840 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6841 ierr = PetscFree(onodes);CHKERRQ(ierr); 6842 if (nis) { 6843 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6844 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6845 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6846 } 6847 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6848 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6849 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6850 for (i=0;i<nis;i++) { 6851 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6852 } 6853 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6854 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6855 } 6856 *mat_n = NULL; 6857 } 6858 PetscFunctionReturn(0); 6859 } 6860 6861 /* temporary hack into ksp private data structure */ 6862 #include <petsc/private/kspimpl.h> 6863 6864 #undef __FUNCT__ 6865 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6866 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6867 { 6868 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6869 PC_IS *pcis = (PC_IS*)pc->data; 6870 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6871 Mat coarsedivudotp = NULL; 6872 MatNullSpace CoarseNullSpace = NULL; 6873 ISLocalToGlobalMapping coarse_islg; 6874 IS coarse_is,*isarray; 6875 PetscInt i,im_active=-1,active_procs=-1; 6876 PetscInt nis,nisdofs,nisneu,nisvert; 6877 PC pc_temp; 6878 PCType coarse_pc_type; 6879 KSPType coarse_ksp_type; 6880 PetscBool multilevel_requested,multilevel_allowed; 6881 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6882 Mat t_coarse_mat_is; 6883 PetscInt ncoarse; 6884 PetscBool compute_vecs = PETSC_FALSE; 6885 PetscScalar *array; 6886 MatReuse coarse_mat_reuse; 6887 PetscBool restr, full_restr, have_void; 6888 PetscErrorCode ierr; 6889 6890 PetscFunctionBegin; 6891 /* Assign global numbering to coarse dofs */ 6892 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6893 PetscInt ocoarse_size; 6894 compute_vecs = PETSC_TRUE; 6895 ocoarse_size = pcbddc->coarse_size; 6896 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6897 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6898 /* see if we can avoid some work */ 6899 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6900 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6901 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6902 PC pc; 6903 PetscBool isbddc; 6904 6905 /* temporary workaround since PCBDDC does not have a reset method so far */ 6906 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6907 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6908 if (isbddc) { 6909 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6910 } else { 6911 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6912 } 6913 coarse_reuse = PETSC_FALSE; 6914 } else { /* we can safely reuse already computed coarse matrix */ 6915 coarse_reuse = PETSC_TRUE; 6916 } 6917 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6918 coarse_reuse = PETSC_FALSE; 6919 } 6920 /* reset any subassembling information */ 6921 if (!coarse_reuse || pcbddc->recompute_topography) { 6922 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6923 } 6924 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6925 coarse_reuse = PETSC_TRUE; 6926 } 6927 /* assemble coarse matrix */ 6928 if (coarse_reuse && pcbddc->coarse_ksp) { 6929 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6930 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6931 coarse_mat_reuse = MAT_REUSE_MATRIX; 6932 } else { 6933 coarse_mat = NULL; 6934 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6935 } 6936 6937 /* creates temporary l2gmap and IS for coarse indexes */ 6938 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6939 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6940 6941 /* creates temporary MATIS object for coarse matrix */ 6942 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6943 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6944 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6945 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6946 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6947 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6948 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6949 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6950 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6951 6952 /* count "active" (i.e. with positive local size) and "void" processes */ 6953 im_active = !!(pcis->n); 6954 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6955 6956 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6957 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6958 /* full_restr : just use the receivers from the subassembling pattern */ 6959 coarse_mat_is = NULL; 6960 multilevel_allowed = PETSC_FALSE; 6961 multilevel_requested = PETSC_FALSE; 6962 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6963 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6964 if (multilevel_requested) { 6965 ncoarse = active_procs/pcbddc->coarsening_ratio; 6966 restr = PETSC_FALSE; 6967 full_restr = PETSC_FALSE; 6968 } else { 6969 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6970 restr = PETSC_TRUE; 6971 full_restr = PETSC_TRUE; 6972 } 6973 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6974 ncoarse = PetscMax(1,ncoarse); 6975 if (!pcbddc->coarse_subassembling) { 6976 if (pcbddc->coarsening_ratio > 1) { 6977 ierr = MatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6978 } else { 6979 PetscMPIInt size,rank; 6980 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6981 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6982 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6983 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6984 } 6985 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6986 PetscInt psum; 6987 PetscMPIInt size; 6988 if (pcbddc->coarse_ksp) psum = 1; 6989 else psum = 0; 6990 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6991 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6992 if (ncoarse < size) have_void = PETSC_TRUE; 6993 } 6994 /* determine if we can go multilevel */ 6995 if (multilevel_requested) { 6996 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6997 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6998 } 6999 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7000 7001 /* dump subassembling pattern */ 7002 if (pcbddc->dbg_flag && multilevel_allowed) { 7003 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7004 } 7005 7006 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7007 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 7008 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7009 const PetscInt *idxs; 7010 ISLocalToGlobalMapping tmap; 7011 7012 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7013 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7014 /* allocate space for temporary storage */ 7015 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7016 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7017 /* allocate for IS array */ 7018 nisdofs = pcbddc->n_ISForDofsLocal; 7019 nisneu = !!pcbddc->NeumannBoundariesLocal; 7020 nisvert = 0; /* nisvert is not used */ 7021 nis = nisdofs + nisneu + nisvert; 7022 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7023 /* dofs splitting */ 7024 for (i=0;i<nisdofs;i++) { 7025 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7026 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7027 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7028 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7029 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7030 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7031 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7032 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7033 } 7034 /* neumann boundaries */ 7035 if (pcbddc->NeumannBoundariesLocal) { 7036 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7037 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7038 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7039 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7040 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7041 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7042 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7043 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7044 } 7045 /* free memory */ 7046 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7047 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7048 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7049 } else { 7050 nis = 0; 7051 nisdofs = 0; 7052 nisneu = 0; 7053 nisvert = 0; 7054 isarray = NULL; 7055 } 7056 /* destroy no longer needed map */ 7057 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7058 7059 /* subassemble */ 7060 if (multilevel_allowed) { 7061 Vec vp[1]; 7062 PetscInt nvecs = 0; 7063 PetscBool reuse,reuser; 7064 7065 if (coarse_mat) reuse = PETSC_TRUE; 7066 else reuse = PETSC_FALSE; 7067 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7068 vp[0] = NULL; 7069 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7070 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7071 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7072 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7073 nvecs = 1; 7074 7075 if (pcbddc->divudotp) { 7076 Mat B,loc_divudotp; 7077 Vec v,p; 7078 IS dummy; 7079 PetscInt np; 7080 7081 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7082 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7083 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7084 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7085 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7086 ierr = VecSet(p,1.);CHKERRQ(ierr); 7087 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7088 ierr = VecDestroy(&p);CHKERRQ(ierr); 7089 ierr = MatDestroy(&B);CHKERRQ(ierr); 7090 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7091 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7092 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7093 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7094 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7095 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7096 ierr = VecDestroy(&v);CHKERRQ(ierr); 7097 } 7098 } 7099 if (reuser) { 7100 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7101 } else { 7102 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7103 } 7104 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7105 PetscScalar *arraym,*arrayv; 7106 PetscInt nl; 7107 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7108 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7109 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7110 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7111 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7112 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7113 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7114 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7115 } else { 7116 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7117 } 7118 } else { 7119 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 7120 } 7121 if (coarse_mat_is || coarse_mat) { 7122 PetscMPIInt size; 7123 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7124 if (!multilevel_allowed) { 7125 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7126 } else { 7127 Mat A; 7128 7129 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7130 if (coarse_mat_is) { 7131 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7132 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7133 coarse_mat = coarse_mat_is; 7134 } 7135 /* be sure we don't have MatSeqDENSE as local mat */ 7136 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7137 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7138 } 7139 } 7140 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7141 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7142 7143 /* create local to global scatters for coarse problem */ 7144 if (compute_vecs) { 7145 PetscInt lrows; 7146 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7147 if (coarse_mat) { 7148 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7149 } else { 7150 lrows = 0; 7151 } 7152 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7153 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7154 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7155 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7156 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7157 } 7158 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7159 7160 /* set defaults for coarse KSP and PC */ 7161 if (multilevel_allowed) { 7162 coarse_ksp_type = KSPRICHARDSON; 7163 coarse_pc_type = PCBDDC; 7164 } else { 7165 coarse_ksp_type = KSPPREONLY; 7166 coarse_pc_type = PCREDUNDANT; 7167 } 7168 7169 /* print some info if requested */ 7170 if (pcbddc->dbg_flag) { 7171 if (!multilevel_allowed) { 7172 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7173 if (multilevel_requested) { 7174 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7175 } else if (pcbddc->max_levels) { 7176 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7177 } 7178 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7179 } 7180 } 7181 7182 /* create the coarse KSP object only once with defaults */ 7183 if (coarse_mat) { 7184 PetscViewer dbg_viewer = NULL; 7185 if (pcbddc->dbg_flag) { 7186 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7187 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7188 } 7189 if (!pcbddc->coarse_ksp) { 7190 char prefix[256],str_level[16]; 7191 size_t len; 7192 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7193 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7194 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7195 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7196 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7197 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7198 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7199 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7200 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7201 /* prefix */ 7202 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7203 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7204 if (!pcbddc->current_level) { 7205 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7206 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7207 } else { 7208 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7209 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7210 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7211 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7212 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7213 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7214 } 7215 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7216 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7217 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7218 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7219 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7220 /* allow user customization */ 7221 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7222 } 7223 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7224 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7225 if (nisdofs) { 7226 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7227 for (i=0;i<nisdofs;i++) { 7228 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7229 } 7230 } 7231 if (nisneu) { 7232 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7233 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7234 } 7235 if (nisvert) { 7236 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7237 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7238 } 7239 7240 /* get some info after set from options */ 7241 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7242 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7243 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7244 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7245 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7246 isbddc = PETSC_FALSE; 7247 } 7248 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7249 if (isredundant) { 7250 KSP inner_ksp; 7251 PC inner_pc; 7252 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7253 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7254 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7255 } 7256 7257 /* parameters which miss an API */ 7258 if (isbddc) { 7259 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7260 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7261 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7262 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7263 if (pcbddc_coarse->benign_saddle_point) { 7264 Mat coarsedivudotp_is; 7265 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7266 IS row,col; 7267 const PetscInt *gidxs; 7268 PetscInt n,st,M,N; 7269 7270 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7271 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7272 st = st-n; 7273 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7274 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7275 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7276 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7277 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7278 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7279 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7280 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7281 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7282 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7283 ierr = ISDestroy(&row);CHKERRQ(ierr); 7284 ierr = ISDestroy(&col);CHKERRQ(ierr); 7285 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7286 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7287 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7288 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7289 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7290 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7291 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7292 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7293 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7294 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7295 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7296 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7297 } 7298 } 7299 7300 /* propagate symmetry info of coarse matrix */ 7301 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7302 if (pc->pmat->symmetric_set) { 7303 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7304 } 7305 if (pc->pmat->hermitian_set) { 7306 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7307 } 7308 if (pc->pmat->spd_set) { 7309 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7310 } 7311 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7312 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7313 } 7314 /* set operators */ 7315 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7316 if (pcbddc->dbg_flag) { 7317 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7318 } 7319 } 7320 ierr = PetscFree(isarray);CHKERRQ(ierr); 7321 #if 0 7322 { 7323 PetscViewer viewer; 7324 char filename[256]; 7325 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7326 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7327 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7328 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7329 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7330 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7331 } 7332 #endif 7333 7334 if (pcbddc->coarse_ksp) { 7335 Vec crhs,csol; 7336 7337 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7338 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7339 if (!csol) { 7340 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7341 } 7342 if (!crhs) { 7343 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7344 } 7345 } 7346 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7347 7348 /* compute null space for coarse solver if the benign trick has been requested */ 7349 if (pcbddc->benign_null) { 7350 7351 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7352 for (i=0;i<pcbddc->benign_n;i++) { 7353 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7354 } 7355 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7356 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7357 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7358 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7359 if (coarse_mat) { 7360 Vec nullv; 7361 PetscScalar *array,*array2; 7362 PetscInt nl; 7363 7364 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7365 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7366 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7367 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7368 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7369 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7370 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7371 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7372 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7373 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7374 } 7375 } 7376 7377 if (pcbddc->coarse_ksp) { 7378 PetscBool ispreonly; 7379 7380 if (CoarseNullSpace) { 7381 PetscBool isnull; 7382 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7383 if (isnull) { 7384 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7385 } 7386 /* TODO: add local nullspaces (if any) */ 7387 } 7388 /* setup coarse ksp */ 7389 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7390 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7391 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7392 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7393 KSP check_ksp; 7394 KSPType check_ksp_type; 7395 PC check_pc; 7396 Vec check_vec,coarse_vec; 7397 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7398 PetscInt its; 7399 PetscBool compute_eigs; 7400 PetscReal *eigs_r,*eigs_c; 7401 PetscInt neigs; 7402 const char *prefix; 7403 7404 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7405 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7406 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7407 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7408 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7409 /* prevent from setup unneeded object */ 7410 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7411 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7412 if (ispreonly) { 7413 check_ksp_type = KSPPREONLY; 7414 compute_eigs = PETSC_FALSE; 7415 } else { 7416 check_ksp_type = KSPGMRES; 7417 compute_eigs = PETSC_TRUE; 7418 } 7419 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7420 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7421 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7422 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7423 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7424 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7425 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7426 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7427 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7428 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7429 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7430 /* create random vec */ 7431 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7432 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7433 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7434 /* solve coarse problem */ 7435 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7436 /* set eigenvalue estimation if preonly has not been requested */ 7437 if (compute_eigs) { 7438 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7439 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7440 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7441 if (neigs) { 7442 lambda_max = eigs_r[neigs-1]; 7443 lambda_min = eigs_r[0]; 7444 if (pcbddc->use_coarse_estimates) { 7445 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7446 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7447 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7448 } 7449 } 7450 } 7451 } 7452 7453 /* check coarse problem residual error */ 7454 if (pcbddc->dbg_flag) { 7455 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7456 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7457 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7458 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7459 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7460 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7461 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7462 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7463 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7464 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7465 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7466 if (CoarseNullSpace) { 7467 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7468 } 7469 if (compute_eigs) { 7470 PetscReal lambda_max_s,lambda_min_s; 7471 KSPConvergedReason reason; 7472 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7473 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7474 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7475 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7476 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7477 for (i=0;i<neigs;i++) { 7478 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7479 } 7480 } 7481 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7482 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7483 } 7484 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7485 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7486 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7487 if (compute_eigs) { 7488 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7489 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7490 } 7491 } 7492 } 7493 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7494 /* print additional info */ 7495 if (pcbddc->dbg_flag) { 7496 /* waits until all processes reaches this point */ 7497 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7498 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7499 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7500 } 7501 7502 /* free memory */ 7503 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7504 PetscFunctionReturn(0); 7505 } 7506 7507 #undef __FUNCT__ 7508 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7509 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7510 { 7511 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7512 PC_IS* pcis = (PC_IS*)pc->data; 7513 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7514 IS subset,subset_mult,subset_n; 7515 PetscInt local_size,coarse_size=0; 7516 PetscInt *local_primal_indices=NULL; 7517 const PetscInt *t_local_primal_indices; 7518 PetscErrorCode ierr; 7519 7520 PetscFunctionBegin; 7521 /* Compute global number of coarse dofs */ 7522 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7523 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7524 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7525 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7526 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7527 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7528 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7529 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7530 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7531 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7532 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7533 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7534 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7535 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7536 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7537 7538 /* check numbering */ 7539 if (pcbddc->dbg_flag) { 7540 PetscScalar coarsesum,*array,*array2; 7541 PetscInt i; 7542 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7543 7544 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7545 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7546 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7547 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7548 /* counter */ 7549 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7550 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7551 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7552 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7553 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7554 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7555 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7556 for (i=0;i<pcbddc->local_primal_size;i++) { 7557 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7558 } 7559 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7560 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7561 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7562 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7563 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7564 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7565 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7566 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7567 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7568 for (i=0;i<pcis->n;i++) { 7569 if (array[i] != 0.0 && array[i] != array2[i]) { 7570 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7571 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7572 set_error = PETSC_TRUE; 7573 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7574 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7575 } 7576 } 7577 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7578 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7579 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7580 for (i=0;i<pcis->n;i++) { 7581 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7582 } 7583 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7584 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7585 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7586 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7587 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7588 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7589 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7590 PetscInt *gidxs; 7591 7592 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7593 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7595 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7596 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7597 for (i=0;i<pcbddc->local_primal_size;i++) { 7598 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7599 } 7600 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7601 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7602 } 7603 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7604 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7605 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7606 } 7607 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7608 /* get back data */ 7609 *coarse_size_n = coarse_size; 7610 *local_primal_indices_n = local_primal_indices; 7611 PetscFunctionReturn(0); 7612 } 7613 7614 #undef __FUNCT__ 7615 #define __FUNCT__ "PCBDDCGlobalToLocal" 7616 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7617 { 7618 IS localis_t; 7619 PetscInt i,lsize,*idxs,n; 7620 PetscScalar *vals; 7621 PetscErrorCode ierr; 7622 7623 PetscFunctionBegin; 7624 /* get indices in local ordering exploiting local to global map */ 7625 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7626 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7627 for (i=0;i<lsize;i++) vals[i] = 1.0; 7628 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7629 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7630 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7631 if (idxs) { /* multilevel guard */ 7632 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7633 } 7634 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7635 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7636 ierr = PetscFree(vals);CHKERRQ(ierr); 7637 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7638 /* now compute set in local ordering */ 7639 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7640 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7641 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7642 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7643 for (i=0,lsize=0;i<n;i++) { 7644 if (PetscRealPart(vals[i]) > 0.5) { 7645 lsize++; 7646 } 7647 } 7648 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7649 for (i=0,lsize=0;i<n;i++) { 7650 if (PetscRealPart(vals[i]) > 0.5) { 7651 idxs[lsize++] = i; 7652 } 7653 } 7654 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7655 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7656 *localis = localis_t; 7657 PetscFunctionReturn(0); 7658 } 7659 7660 #undef __FUNCT__ 7661 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7662 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7663 { 7664 PC_IS *pcis=(PC_IS*)pc->data; 7665 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7666 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7667 Mat S_j; 7668 PetscInt *used_xadj,*used_adjncy; 7669 PetscBool free_used_adj; 7670 PetscErrorCode ierr; 7671 7672 PetscFunctionBegin; 7673 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7674 free_used_adj = PETSC_FALSE; 7675 if (pcbddc->sub_schurs_layers == -1) { 7676 used_xadj = NULL; 7677 used_adjncy = NULL; 7678 } else { 7679 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7680 used_xadj = pcbddc->mat_graph->xadj; 7681 used_adjncy = pcbddc->mat_graph->adjncy; 7682 } else if (pcbddc->computed_rowadj) { 7683 used_xadj = pcbddc->mat_graph->xadj; 7684 used_adjncy = pcbddc->mat_graph->adjncy; 7685 } else { 7686 PetscBool flg_row=PETSC_FALSE; 7687 const PetscInt *xadj,*adjncy; 7688 PetscInt nvtxs; 7689 7690 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7691 if (flg_row) { 7692 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7693 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7694 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7695 free_used_adj = PETSC_TRUE; 7696 } else { 7697 pcbddc->sub_schurs_layers = -1; 7698 used_xadj = NULL; 7699 used_adjncy = NULL; 7700 } 7701 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7702 } 7703 } 7704 7705 /* setup sub_schurs data */ 7706 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7707 if (!sub_schurs->schur_explicit) { 7708 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7709 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7710 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7711 } else { 7712 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7713 PetscBool isseqaij,need_change = PETSC_FALSE; 7714 PetscInt benign_n; 7715 Mat change = NULL; 7716 Vec scaling = NULL; 7717 IS change_primal = NULL; 7718 7719 if (!pcbddc->use_vertices && reuse_solvers) { 7720 PetscInt n_vertices; 7721 7722 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7723 reuse_solvers = (PetscBool)!n_vertices; 7724 } 7725 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7726 if (!isseqaij) { 7727 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7728 if (matis->A == pcbddc->local_mat) { 7729 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7730 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7731 } else { 7732 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7733 } 7734 } 7735 if (!pcbddc->benign_change_explicit) { 7736 benign_n = pcbddc->benign_n; 7737 } else { 7738 benign_n = 0; 7739 } 7740 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7741 We need a global reduction to avoid possible deadlocks. 7742 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7743 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7744 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7745 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7746 need_change = (PetscBool)(!need_change); 7747 } 7748 /* If the user defines additional constraints, we import them here. 7749 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7750 if (need_change) { 7751 PC_IS *pcisf; 7752 PC_BDDC *pcbddcf; 7753 PC pcf; 7754 7755 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7756 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7757 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7758 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7759 /* hacks */ 7760 pcisf = (PC_IS*)pcf->data; 7761 pcisf->is_B_local = pcis->is_B_local; 7762 pcisf->vec1_N = pcis->vec1_N; 7763 pcisf->BtoNmap = pcis->BtoNmap; 7764 pcisf->n = pcis->n; 7765 pcisf->n_B = pcis->n_B; 7766 pcbddcf = (PC_BDDC*)pcf->data; 7767 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7768 pcbddcf->mat_graph = pcbddc->mat_graph; 7769 pcbddcf->use_faces = PETSC_TRUE; 7770 pcbddcf->use_change_of_basis = PETSC_TRUE; 7771 pcbddcf->use_change_on_faces = PETSC_TRUE; 7772 pcbddcf->use_qr_single = PETSC_TRUE; 7773 pcbddcf->fake_change = PETSC_TRUE; 7774 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7775 /* store information on primal vertices and change of basis (in local numbering) */ 7776 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7777 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7778 change = pcbddcf->ConstraintMatrix; 7779 pcbddcf->ConstraintMatrix = NULL; 7780 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7781 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7782 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7783 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7784 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7785 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7786 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7787 pcf->ops->destroy = NULL; 7788 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7789 } 7790 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7791 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7792 ierr = MatDestroy(&change);CHKERRQ(ierr); 7793 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7794 } 7795 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7796 7797 /* free adjacency */ 7798 if (free_used_adj) { 7799 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7800 } 7801 PetscFunctionReturn(0); 7802 } 7803 7804 #undef __FUNCT__ 7805 #define __FUNCT__ "PCBDDCInitSubSchurs" 7806 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7807 { 7808 PC_IS *pcis=(PC_IS*)pc->data; 7809 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7810 PCBDDCGraph graph; 7811 PetscErrorCode ierr; 7812 7813 PetscFunctionBegin; 7814 /* attach interface graph for determining subsets */ 7815 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7816 IS verticesIS,verticescomm; 7817 PetscInt vsize,*idxs; 7818 7819 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7820 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7821 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7822 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7823 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7824 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7825 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7826 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7827 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7828 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7829 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7830 } else { 7831 graph = pcbddc->mat_graph; 7832 } 7833 /* print some info */ 7834 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 7835 IS vertices; 7836 PetscInt nv,nedges,nfaces; 7837 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7838 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7839 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7840 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7841 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7842 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7843 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7844 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7845 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7846 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7847 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7848 } 7849 7850 /* sub_schurs init */ 7851 if (!pcbddc->sub_schurs) { 7852 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7853 } 7854 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 7855 7856 /* free graph struct */ 7857 if (pcbddc->sub_schurs_rebuild) { 7858 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7859 } 7860 PetscFunctionReturn(0); 7861 } 7862 7863 #undef __FUNCT__ 7864 #define __FUNCT__ "PCBDDCCheckOperator" 7865 PetscErrorCode PCBDDCCheckOperator(PC pc) 7866 { 7867 PC_IS *pcis=(PC_IS*)pc->data; 7868 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7869 PetscErrorCode ierr; 7870 7871 PetscFunctionBegin; 7872 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7873 IS zerodiag = NULL; 7874 Mat S_j,B0_B=NULL; 7875 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7876 PetscScalar *p0_check,*array,*array2; 7877 PetscReal norm; 7878 PetscInt i; 7879 7880 /* B0 and B0_B */ 7881 if (zerodiag) { 7882 IS dummy; 7883 7884 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7885 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7886 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7887 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7888 } 7889 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7890 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7891 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7892 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7893 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7894 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7895 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7896 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7897 /* S_j */ 7898 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7899 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7900 7901 /* mimic vector in \widetilde{W}_\Gamma */ 7902 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7903 /* continuous in primal space */ 7904 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7905 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7906 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7907 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7908 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7909 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7910 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7911 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7912 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7913 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7914 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7915 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7916 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7917 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7918 7919 /* assemble rhs for coarse problem */ 7920 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7921 /* local with Schur */ 7922 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7923 if (zerodiag) { 7924 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7925 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7926 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7927 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7928 } 7929 /* sum on primal nodes the local contributions */ 7930 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7931 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7932 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7933 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7934 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7935 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7936 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7937 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7938 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7939 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7940 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7941 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7942 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7943 /* scale primal nodes (BDDC sums contibutions) */ 7944 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7945 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7946 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7947 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7948 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7949 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7950 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7951 /* global: \widetilde{B0}_B w_\Gamma */ 7952 if (zerodiag) { 7953 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7954 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7955 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7956 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7957 } 7958 /* BDDC */ 7959 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7960 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7961 7962 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7963 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7964 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7965 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7966 for (i=0;i<pcbddc->benign_n;i++) { 7967 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7968 } 7969 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7970 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7971 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7972 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7973 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7974 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7975 } 7976 PetscFunctionReturn(0); 7977 } 7978