1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 if (!esize) PetscFunctionReturn(0); 84 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 85 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 86 87 /* gradients */ 88 ptr = work + 5*esize; 89 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 90 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 91 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 92 ierr = MatDestroy(&GE);CHKERRQ(ierr); 93 94 /* constants */ 95 ptr += rsize*csize; 96 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 97 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 99 ierr = MatDestroy(&GE);CHKERRQ(ierr); 100 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 101 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "PCBDDCNedelecSupport" 107 PetscErrorCode PCBDDCNedelecSupport(PC pc) 108 { 109 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 110 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 111 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 112 MatNullSpace nnsp; 113 Vec tvec,*quads; 114 PetscSF sfv; 115 ISLocalToGlobalMapping el2g,vl2g,fl2g; 116 MPI_Comm comm; 117 IS lned,primals,allprimals,nedfieldlocal; 118 IS *eedges,*extrows,*extcols,*alleedges; 119 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee,bter; 120 PetscScalar *vals,*work; 121 PetscReal *rwork; 122 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 123 PetscInt ne,nv,Lv,order,n,field,rst; 124 PetscInt n_neigh,*neigh,*n_shared,**shared; 125 PetscInt i,j,extmem,cum,maxsize,nee,nquads=2; 126 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 127 PetscInt *sfvleaves,*sfvroots; 128 PetscInt *corners,*cedges; 129 #if defined(PETSC_USE_DEBUG) 130 PetscInt *emarks; 131 #endif 132 PetscBool print,eerr,done,lrc[2],conforming; 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 /* test variable order code and print debug info TODO: to be removed */ 137 print = PETSC_FALSE; 138 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 139 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 140 141 /* Return to caller if there are no edges in the decomposition */ 142 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 143 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 144 ierr = ISLocalToGlobalMappingGetSize(el2g,&n);CHKERRQ(ierr); 145 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 146 lrc[0] = PETSC_FALSE; 147 for (i=0;i<n;i++) { 148 if (PetscRealPart(vals[i]) > 2.) { 149 lrc[0] = PETSC_TRUE; 150 break; 151 } 152 } 153 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 154 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 155 if (!lrc[1]) PetscFunctionReturn(0); 156 157 /* Get discrete gradient 158 If it is defined for a subset of dofs, assumes G is given in global ordering for all the dofs */ 159 G = pcbddc->discretegradient; 160 order = pcbddc->nedorder; 161 conforming = pcbddc->conforming; 162 field = pcbddc->nedfield; 163 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 164 if (pcbddc->n_ISForDofsLocal && field > -1) { 165 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 166 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 167 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 168 } else { 169 PetscBool testnedfield = PETSC_FALSE; 170 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 171 if (!testnedfield) { 172 ne = n; 173 nedfieldlocal = NULL; 174 } else { 175 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 176 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 177 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 178 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 179 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 180 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 181 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 182 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 183 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 184 for (i=0,cum=0;i<n;i++) { 185 if (matis->sf_leafdata[i] > 1) { 186 matis->sf_leafdata[cum++] = i; 187 } 188 } 189 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 190 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 191 } 192 } 193 194 if (nedfieldlocal) { /* merge with previous code when testing is done */ 195 IS is; 196 197 /* need to map from the local Nedelec field to local numbering */ 198 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 199 /* need to map from the local Nedelec field to global numbering */ 200 ierr = ISLocalToGlobalMappingApplyIS(el2g,nedfieldlocal,&is);CHKERRQ(ierr); 201 ierr = ISLocalToGlobalMappingCreateIS(is,&el2g);CHKERRQ(ierr); 202 ierr = ISDestroy(&is);CHKERRQ(ierr); 203 } else { 204 ierr = PetscObjectReference((PetscObject)el2g);CHKERRQ(ierr); 205 fl2g = NULL; 206 } 207 208 /* Sanity checks */ 209 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 210 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 211 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 212 213 /* Drop connections for interior edges (this modifies G) */ 214 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 215 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 216 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 217 if (nedfieldlocal) { 218 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 219 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 220 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 221 } else { 222 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 223 } 224 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 225 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 226 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 227 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 228 if (matis->sf_rootdata[i] < 2) { 229 matis->sf_rootdata[cum++] = i + rst; 230 } 231 } 232 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 233 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 234 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 235 236 /* Extract subdomain relevant rows of G */ 237 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 238 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 239 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 240 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 241 ierr = ISDestroy(&lned);CHKERRQ(ierr); 242 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 243 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 244 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 245 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 246 if (print) { 247 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 248 ierr = MatView(lG,NULL);CHKERRQ(ierr); 249 } 250 251 /* SF for nodal communications */ 252 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 253 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 254 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 255 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 256 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 257 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 258 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 259 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 260 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 261 262 /* Destroy temporary G created in MATIS format */ 263 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 264 265 /* Save lG */ 266 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 267 268 /* Analyze the edge-nodes connections (duplicate lG) */ 269 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 270 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 271 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 272 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 273 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 274 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 275 /* need to import the boundary specification to ensure the 276 proper detection of coarse edges' endpoints */ 277 if (pcbddc->DirichletBoundariesLocal) { 278 IS is; 279 280 if (fl2g) { 281 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 282 } else { 283 is = pcbddc->DirichletBoundariesLocal; 284 } 285 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 286 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 287 for (i=0;i<cum;i++) { 288 if (idxs[i] >= 0) { 289 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 290 } 291 } 292 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 293 if (fl2g) { 294 ierr = ISDestroy(&is);CHKERRQ(ierr); 295 } 296 } 297 if (pcbddc->NeumannBoundariesLocal) { 298 IS is; 299 300 if (fl2g) { 301 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 302 } else { 303 is = pcbddc->NeumannBoundariesLocal; 304 } 305 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 306 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 307 for (i=0;i<cum;i++) { 308 if (idxs[i] >= 0) { 309 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 310 } 311 } 312 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 313 if (fl2g) { 314 ierr = ISDestroy(&is);CHKERRQ(ierr); 315 } 316 } 317 318 /* need to remove coarse faces' dofs to ensure the 319 proper detection of coarse edges' endpoints */ 320 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 321 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 322 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 323 for (i=1;i<n_neigh;i++) 324 for (j=0;j<n_shared[i];j++) 325 marks[shared[i][j]]++; 326 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) { 328 if (marks[i] > 1 || (marks[i] == 1 && PetscBTLookup(btb,i))) { 329 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 330 } 331 } 332 333 if (!conforming) { 334 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 335 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 336 } 337 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 338 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 339 cum = 0; 340 for (i=0;i<ne;i++) { 341 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 342 if (!PetscBTLookup(btee,i)) { 343 marks[cum++] = i; 344 continue; 345 } 346 /* set badly connected edge dofs as primal */ 347 if (!conforming) { 348 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 349 marks[cum++] = i; 350 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 351 for (j=ii[i];j<ii[i+1];j++) { 352 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 353 } 354 } else { 355 /* every edge dofs should be connected trough a certain number of nodal dofs 356 to other edge dofs belonging to coarse edges 357 - at most 2 endpoints 358 - order-1 interior nodal dofs 359 - no undefined nodal dofs (nconn < order) 360 */ 361 PetscInt ends = 0,ints = 0, undef = 0; 362 for (j=ii[i];j<ii[i+1];j++) { 363 PetscInt v = jj[j],k; 364 PetscInt nconn = iit[v+1]-iit[v]; 365 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 366 if (nconn > order) ends++; 367 else if (nconn == order) ints++; 368 else undef++; 369 } 370 if (undef || ends > 2 || ints != order -1) { 371 marks[cum++] = i; 372 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 373 for (j=ii[i];j<ii[i+1];j++) { 374 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 375 } 376 } 377 } 378 } 379 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 380 if (!order && ii[i+1] != ii[i]) { 381 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 382 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 383 } 384 } 385 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 386 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 387 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 388 if (!conforming) { 389 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 390 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 391 } 392 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 393 /* identify splitpoints and corner candidates */ 394 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 395 if (print) { 396 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 397 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 398 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 399 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 400 } 401 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 402 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 403 for (i=0;i<nv;i++) { 404 PetscInt ord = order, test = ii[i+1]-ii[i]; 405 if (!order) { /* variable order */ 406 PetscReal vorder = 0.; 407 408 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 409 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 410 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 411 ord = 1; 412 } 413 #if defined(PETSC_USE_DEBUG) 414 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 415 #endif 416 if (test >= 3*ord) { /* splitpoints */ 417 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 418 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 419 } else if (test == ord) { 420 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 421 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 422 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 423 } else { 424 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 425 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 426 } 427 } 428 } 429 430 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 431 if (order != 1) { 432 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 433 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 434 for (i=0;i<nv;i++) { 435 if (PetscBTLookup(btvcand,i)) { 436 PetscBool found = PETSC_FALSE; 437 for (j=ii[i];j<ii[i+1] && !found;j++) { 438 PetscInt k,e = jj[j]; 439 if (PetscBTLookup(bte,e)) continue; 440 for (k=iit[e];k<iit[e+1];k++) { 441 PetscInt v = jjt[k]; 442 if (v != i && PetscBTLookup(btvcand,v)) { 443 found = PETSC_TRUE; 444 break; 445 } 446 } 447 } 448 if (!found) { 449 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 450 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 451 } else { 452 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 453 } 454 } 455 } 456 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 457 } 458 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 459 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 460 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 461 462 /* Get the local G^T explicitly */ 463 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 464 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 465 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 466 467 /* Mark interior nodal dofs */ 468 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 469 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 470 for (i=1;i<n_neigh;i++) { 471 for (j=0;j<n_shared[i];j++) { 472 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 473 } 474 } 475 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 476 477 /* communicate corners and splitpoints */ 478 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 479 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 480 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 481 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 482 483 if (print) { 484 IS tbz; 485 486 cum = 0; 487 for (i=0;i<nv;i++) 488 if (sfvleaves[i]) 489 vmarks[cum++] = i; 490 491 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 492 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 493 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 494 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 495 } 496 497 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 498 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 499 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 500 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 501 502 /* Zero rows of lGt corresponding to identified corners 503 and interior nodal dofs */ 504 cum = 0; 505 for (i=0;i<nv;i++) { 506 if (sfvleaves[i]) { 507 vmarks[cum++] = i; 508 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 509 } 510 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 511 } 512 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 513 if (print) { 514 IS tbz; 515 516 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 517 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 518 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 519 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 520 } 521 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 522 ierr = PetscFree(vmarks);CHKERRQ(ierr); 523 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 524 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 525 526 /* Recompute G */ 527 ierr = MatDestroy(&lG);CHKERRQ(ierr); 528 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 529 if (print) { 530 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 531 ierr = MatView(lG,NULL);CHKERRQ(ierr); 532 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 533 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 534 } 535 536 /* Get primal dofs (if any) */ 537 cum = 0; 538 for (i=0;i<ne;i++) { 539 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 540 } 541 if (fl2g) { 542 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 543 } 544 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 545 if (print) { 546 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 547 ierr = ISView(primals,NULL);CHKERRQ(ierr); 548 } 549 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 550 /* TODO: what if the user passed in some of them ? */ 551 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 552 ierr = ISDestroy(&primals);CHKERRQ(ierr); 553 554 /* Compute edge connectivity */ 555 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 556 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 557 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 558 if (done && i && ii && jj) { /* when lG is empty, don't pass pointers */ 559 if (fl2g) { 560 PetscBT btf; 561 PetscInt *iia,*jja,*iiu,*jju; 562 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 563 564 /* create CSR for all local dofs */ 565 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 566 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 567 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 568 iiu = pcbddc->mat_graph->xadj; 569 jju = pcbddc->mat_graph->adjncy; 570 } else if (pcbddc->use_local_adj) { 571 rest = PETSC_TRUE; 572 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 573 } else { 574 free = PETSC_TRUE; 575 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 576 iiu[0] = 0; 577 for (i=0;i<n;i++) { 578 iiu[i+1] = i+1; 579 jju[i] = -1; 580 } 581 } 582 583 /* import sizes of CSR */ 584 iia[0] = 0; 585 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 586 587 /* overwrite entries corresponding to the Nedelec field */ 588 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 589 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 590 for (i=0;i<ne;i++) { 591 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 592 iia[idxs[i]+1] = ii[i+1]-ii[i]; 593 } 594 595 /* iia in CSR */ 596 for (i=0;i<n;i++) iia[i+1] += iia[i]; 597 598 /* jja in CSR */ 599 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 600 for (i=0;i<n;i++) 601 if (!PetscBTLookup(btf,i)) 602 for (j=0;j<iiu[i+1]-iiu[i];j++) 603 jja[iia[i]+j] = jju[iiu[i]+j]; 604 605 /* map edge dofs connectivity */ 606 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 607 for (i=0;i<ne;i++) { 608 PetscInt e = idxs[i]; 609 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 610 } 611 612 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 613 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 614 if (rest) { 615 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 616 } 617 if (free) { 618 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 619 } 620 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 621 } else { 622 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 623 } 624 } 625 626 /* Analyze interface for edge dofs */ 627 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 628 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 629 630 /* Get coarse edges in the edge space */ 631 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 632 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 633 634 if (fl2g) { 635 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 636 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 637 for (i=0;i<nee;i++) { 638 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 639 } 640 } else { 641 eedges = alleedges; 642 primals = allprimals; 643 } 644 645 /* Mark fine edge dofs with their coarse edge id */ 646 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 647 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 648 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 649 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 650 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 651 if (print) { 652 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 653 ierr = ISView(primals,NULL);CHKERRQ(ierr); 654 } 655 656 maxsize = 0; 657 for (i=0;i<nee;i++) { 658 PetscInt size,mark = i+1; 659 660 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 661 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 662 for (j=0;j<size;j++) marks[idxs[j]] = mark; 663 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 664 maxsize = PetscMax(maxsize,size); 665 } 666 667 /* Find coarse edge endpoints */ 668 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 669 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 670 for (i=0;i<nee;i++) { 671 PetscInt mark = i+1,size; 672 673 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 674 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 675 if (print) { 676 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 677 ISView(eedges[i],NULL); 678 } 679 for (j=0;j<size;j++) { 680 PetscInt k, ee = idxs[j]; 681 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 682 for (k=ii[ee];k<ii[ee+1];k++) { 683 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 684 if (PetscBTLookup(btv,jj[k])) { 685 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 686 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 687 PetscInt k2; 688 PetscBool corner = PETSC_FALSE; 689 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 690 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 691 /* it's a corner if either is connected with an edge dof belonging to a different cc or 692 if the edge dof lie on the natural part of the boundary */ 693 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 694 corner = PETSC_TRUE; 695 break; 696 } 697 } 698 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 699 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 700 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 701 } else { 702 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 703 } 704 } 705 } 706 } 707 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 708 } 709 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 710 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 711 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 712 713 /* Reset marked primal dofs */ 714 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 715 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 716 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 717 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 718 719 /* Now use the initial lG */ 720 ierr = MatDestroy(&lG);CHKERRQ(ierr); 721 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 722 lG = lGinit; 723 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 724 725 /* Compute extended cols indices */ 726 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 727 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 728 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 729 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 730 i *= maxsize; 731 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 732 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 733 eerr = PETSC_FALSE; 734 for (i=0;i<nee;i++) { 735 PetscInt size,found = 0; 736 737 cum = 0; 738 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 739 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 740 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 741 for (j=0;j<size;j++) { 742 PetscInt k,ee = idxs[j]; 743 for (k=ii[ee];k<ii[ee+1];k++) { 744 PetscInt vv = jj[k]; 745 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 746 else if (!PetscBTLookupSet(btvc,vv)) found++; 747 } 748 } 749 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 750 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 751 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 752 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 753 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 754 /* it may happen that endpoints are not defined at this point 755 if it is the case, mark this edge for a second pass */ 756 if (cum != size -1 || found != 2) { 757 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 758 if (print) { 759 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 760 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 761 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 762 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 763 } 764 eerr = PETSC_TRUE; 765 } 766 } 767 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 768 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 769 if (done) { 770 PetscInt *newprimals; 771 772 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 773 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 774 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 775 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 776 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 777 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 778 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 779 for (i=0;i<nee;i++) { 780 PetscBool has_candidates = PETSC_FALSE; 781 if (PetscBTLookup(bter,i)) { 782 PetscInt size,mark = i+1; 783 784 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 785 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 786 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 787 for (j=0;j<size;j++) { 788 PetscInt k,ee = idxs[j]; 789 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 790 for (k=ii[ee];k<ii[ee+1];k++) { 791 /* set all candidates located on the edge as corners */ 792 if (PetscBTLookup(btvcand,jj[k])) { 793 PetscInt k2,vv = jj[k]; 794 has_candidates = PETSC_TRUE; 795 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 796 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 797 /* set all edge dofs connected to candidate as primals */ 798 for (k2=iit[vv];k2<iit[vv+1];k2++) { 799 if (marks[jjt[k2]] == mark) { 800 PetscInt k3,ee2 = jjt[k2]; 801 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 802 newprimals[cum++] = ee2; 803 /* finally set the new corners */ 804 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 805 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 806 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 807 } 808 } 809 } 810 } else { 811 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 812 } 813 } 814 } 815 if (!has_candidates) { /* circular edge */ 816 PetscInt k, ee = idxs[0],*tmarks; 817 818 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 819 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 820 for (k=ii[ee];k<ii[ee+1];k++) { 821 PetscInt k2; 822 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 823 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 824 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 825 } 826 for (j=0;j<size;j++) { 827 if (tmarks[idxs[j]] > 1) { 828 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 829 newprimals[cum++] = idxs[j]; 830 } 831 } 832 ierr = PetscFree(tmarks);CHKERRQ(ierr); 833 } 834 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 835 } 836 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 837 } 838 ierr = PetscFree(extcols);CHKERRQ(ierr); 839 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 840 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 841 if (fl2g) { 842 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 843 ierr = ISDestroy(&primals);CHKERRQ(ierr); 844 for (i=0;i<nee;i++) { 845 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 846 } 847 ierr = PetscFree(eedges);CHKERRQ(ierr); 848 } 849 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 850 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 851 ierr = PetscFree(newprimals);CHKERRQ(ierr); 852 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 853 ierr = ISDestroy(&primals);CHKERRQ(ierr); 854 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 855 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 856 if (fl2g) { 857 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 858 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 859 for (i=0;i<nee;i++) { 860 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 861 } 862 } else { 863 eedges = alleedges; 864 primals = allprimals; 865 } 866 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 867 868 /* Mark again */ 869 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 } 878 if (print) { 879 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 880 ierr = ISView(primals,NULL);CHKERRQ(ierr); 881 } 882 883 /* Recompute extended cols */ 884 eerr = PETSC_FALSE; 885 for (i=0;i<nee;i++) { 886 PetscInt size; 887 888 cum = 0; 889 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 890 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 891 for (j=0;j<size;j++) { 892 PetscInt k,ee = idxs[j]; 893 for (k=ii[ee];k<ii[ee+1];k++) { 894 if (!PetscBTLookup(btv,jj[k])) { 895 extrow[cum++] = jj[k]; 896 } 897 } 898 } 899 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 900 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 901 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 902 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 903 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 904 if (cum != size -1) { 905 if (print) { 906 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 907 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 908 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 909 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 910 } 911 eerr = PETSC_TRUE; 912 } 913 } 914 } 915 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 916 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 917 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 918 /* an error should not occur at this point */ 919 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 920 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 921 922 /* Check the number of endpoints */ 923 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 924 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 925 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 926 for (i=0;i<nee;i++) { 927 PetscInt size, found = 0, gc[2]; 928 929 /* init with defaults */ 930 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 931 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 932 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 933 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 934 for (j=0;j<size;j++) { 935 PetscInt k,ee = idxs[j]; 936 for (k=ii[ee];k<ii[ee+1];k++) { 937 PetscInt vv = jj[k]; 938 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 939 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 940 corners[i*2+found++] = vv; 941 } 942 } 943 } 944 if (found != 2) { 945 PetscInt e; 946 if (fl2g) { 947 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 948 } else { 949 e = idxs[0]; 950 } 951 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 952 } 953 /* WARNING : this depends on how pcbddc->primal_indices_local_idxs is filled up in PCBDDConstraintsSetUp */ 954 cedges[i] = idxs[0]; 955 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 956 if (gc[0] > gc[1]) { 957 PetscInt swap = corners[2*i]; 958 corners[2*i] = corners[2*i+1]; 959 corners[2*i+1] = swap; 960 } 961 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 962 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 963 } 964 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 965 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 966 967 #if defined(PETSC_USE_DEBUG) 968 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 969 not interfere with neighbouring coarse edges */ 970 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 971 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 972 for (i=0;i<nv;i++) { 973 PetscInt emax = 0,eemax = 0; 974 975 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 976 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 977 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 978 for (j=1;j<nee+1;j++) { 979 if (emax < emarks[j]) { 980 emax = emarks[j]; 981 eemax = j; 982 } 983 } 984 /* not relevant for edges */ 985 if (!eemax) continue; 986 987 for (j=ii[i];j<ii[i+1];j++) { 988 if (marks[jj[j]] && marks[jj[j]] != eemax) { 989 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 990 } 991 } 992 } 993 ierr = PetscFree(emarks);CHKERRQ(ierr); 994 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 995 #endif 996 997 /* Compute extended rows indices for edge blocks of the change of basis */ 998 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 999 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1000 extmem *= maxsize; 1001 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1002 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1003 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1004 for (i=0;i<nv;i++) { 1005 PetscInt mark = 0,size,start; 1006 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1007 for (j=ii[i];j<ii[i+1];j++) 1008 if (marks[jj[j]] && !mark) 1009 mark = marks[jj[j]]; 1010 1011 /* not relevant */ 1012 if (!mark) continue; 1013 1014 /* import extended row */ 1015 mark--; 1016 start = mark*extmem+extrowcum[mark]; 1017 size = ii[i+1]-ii[i]; 1018 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1019 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1020 extrowcum[mark] += size; 1021 } 1022 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1023 cum = 0; 1024 for (i=0;i<nee;i++) { 1025 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1026 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1027 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1028 cum = PetscMax(cum,size); 1029 } 1030 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1031 ierr = PetscFree(marks);CHKERRQ(ierr); 1032 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1033 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1034 1035 /* Workspace for lapack inner calls and VecSetValues */ 1036 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1037 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1038 for (i=0;i<maxsize;i++) vals[i] = 1.; 1039 1040 /* Create vectors for quadrature rules */ 1041 /* TODO preserve other quadratures */ 1042 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 1043 for (i=0;i<nquads;i++) { 1044 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 1045 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 1046 } 1047 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 1048 1049 /* Create change of basis matrix (preallocation can be improved) */ 1050 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1051 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1052 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1053 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1054 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1055 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1056 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 1057 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1058 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1059 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1060 1061 /* Defaults to identity */ 1062 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1063 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1064 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1065 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1066 1067 for (i=0;i<nee;i++) { 1068 Mat Gins = NULL, GKins = NULL; 1069 1070 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 1071 if (Gins && GKins) { 1072 PetscScalar *data; 1073 const PetscInt *rows,*cols; 1074 PetscInt nrh,nch,nrc,ncc; 1075 1076 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1077 /* H1 */ 1078 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1079 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1080 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1081 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1082 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1083 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1084 /* complement */ 1085 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1086 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 1087 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 1088 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1089 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1090 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1091 /* Gins kernel quadratures */ 1092 for (j=0;j<ncc;j++) { 1093 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 1094 } 1095 /* H1 average */ 1096 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1097 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1098 } 1099 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1100 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1101 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1102 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1103 } 1104 1105 /* Start assembling */ 1106 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1107 for (i=0;i<nquads;i++) { 1108 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 1109 } 1110 1111 /* Free */ 1112 if (fl2g) { 1113 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1114 for (i=0;i<nee;i++) { 1115 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1116 } 1117 ierr = PetscFree(eedges);CHKERRQ(ierr); 1118 } 1119 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1120 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1121 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1122 ierr = PetscFree(extrow);CHKERRQ(ierr); 1123 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1124 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1125 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1126 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1127 ierr = PetscFree(vals);CHKERRQ(ierr); 1128 ierr = PetscFree(corners);CHKERRQ(ierr); 1129 ierr = PetscFree(cedges);CHKERRQ(ierr); 1130 ierr = PetscFree(extrows);CHKERRQ(ierr); 1131 ierr = PetscFree(extcols);CHKERRQ(ierr); 1132 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1133 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1134 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1135 1136 /* Complete assembling */ 1137 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1138 for (i=0;i<nquads;i++) { 1139 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 1140 } 1141 for (i=0;i<nquads;i++) { 1142 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 1143 } 1144 ierr = PetscFree(quads);CHKERRQ(ierr); 1145 1146 /* set change of basis */ 1147 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1148 ierr = MatDestroy(&T);CHKERRQ(ierr); 1149 1150 /* set quadratures */ 1151 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 1152 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 1153 1154 PetscFunctionReturn(0); 1155 } 1156 1157 /* the near-null space of BDDC carries information on quadrature weights, 1158 and these can be collinear -> so cheat with MatNullSpaceCreate 1159 and create a suitable set of basis vectors first */ 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1162 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1163 { 1164 PetscErrorCode ierr; 1165 PetscInt i; 1166 1167 PetscFunctionBegin; 1168 for (i=0;i<nvecs;i++) { 1169 PetscInt first,last; 1170 1171 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1172 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1173 if (i>=first && i < last) { 1174 PetscScalar *data; 1175 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1176 if (!has_const) { 1177 data[i-first] = 1.; 1178 } else { 1179 data[2*i-first] = 1./PetscSqrtReal(2.); 1180 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1181 } 1182 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1183 } 1184 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1185 } 1186 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1187 for (i=0;i<nvecs;i++) { /* reset vectors */ 1188 PetscInt first,last; 1189 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1190 if (i>=first && i < last) { 1191 PetscScalar *data; 1192 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1193 if (!has_const) { 1194 data[i-first] = 0.; 1195 } else { 1196 data[2*i-first] = 0.; 1197 data[2*i-first+1] = 0.; 1198 } 1199 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1200 } 1201 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1202 } 1203 PetscFunctionReturn(0); 1204 } 1205 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1208 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1209 { 1210 Mat loc_divudotp; 1211 Vec p,v,vins,quad_vec,*quad_vecs; 1212 ISLocalToGlobalMapping map; 1213 IS *faces,*edges; 1214 PetscScalar *vals; 1215 const PetscScalar *array; 1216 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1217 PetscMPIInt rank; 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1222 if (graph->twodim) { 1223 lmaxneighs = 2; 1224 } else { 1225 lmaxneighs = 1; 1226 for (i=0;i<ne;i++) { 1227 const PetscInt *idxs; 1228 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1229 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1230 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1231 } 1232 lmaxneighs++; /* graph count does not include self */ 1233 } 1234 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1235 maxsize = 0; 1236 for (i=0;i<ne;i++) { 1237 PetscInt nn; 1238 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1239 maxsize = PetscMax(maxsize,nn); 1240 } 1241 for (i=0;i<nf;i++) { 1242 PetscInt nn; 1243 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1244 maxsize = PetscMax(maxsize,nn); 1245 } 1246 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1247 /* create vectors to hold quadrature weights */ 1248 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1249 if (!transpose) { 1250 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1251 } else { 1252 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1253 } 1254 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1255 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1256 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1257 for (i=0;i<maxneighs;i++) { 1258 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1259 } 1260 1261 /* compute local quad vec */ 1262 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1263 if (!transpose) { 1264 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1265 } else { 1266 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1267 } 1268 ierr = VecSet(p,1.);CHKERRQ(ierr); 1269 if (!transpose) { 1270 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1271 } else { 1272 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1273 } 1274 if (vl2l) { 1275 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1276 } else { 1277 vins = v; 1278 } 1279 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1280 ierr = VecDestroy(&p);CHKERRQ(ierr); 1281 1282 /* insert in global quadrature vecs */ 1283 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1284 for (i=0;i<nf;i++) { 1285 const PetscInt *idxs; 1286 PetscInt idx,nn,j; 1287 1288 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1289 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1290 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1291 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1292 idx = -(idx+1); 1293 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1294 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1295 } 1296 for (i=0;i<ne;i++) { 1297 const PetscInt *idxs; 1298 PetscInt idx,nn,j; 1299 1300 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1301 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1302 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1303 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1304 idx = -(idx+1); 1305 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1306 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1307 } 1308 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1309 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1310 if (vl2l) { 1311 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1312 } 1313 ierr = VecDestroy(&v);CHKERRQ(ierr); 1314 ierr = PetscFree(vals);CHKERRQ(ierr); 1315 1316 /* assemble near null space */ 1317 for (i=0;i<maxneighs;i++) { 1318 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1319 } 1320 for (i=0;i<maxneighs;i++) { 1321 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1322 } 1323 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1330 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1331 { 1332 PetscErrorCode ierr; 1333 Vec local,global; 1334 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1335 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1336 1337 PetscFunctionBegin; 1338 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1339 /* need to convert from global to local topology information and remove references to information in global ordering */ 1340 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1341 if (pcbddc->user_provided_isfordofs) { 1342 if (pcbddc->n_ISForDofs) { 1343 PetscInt i; 1344 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1345 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1346 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1347 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1348 } 1349 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1350 pcbddc->n_ISForDofs = 0; 1351 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1352 } 1353 } else { 1354 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1355 PetscInt i, n = matis->A->rmap->n; 1356 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1357 if (i > 1) { 1358 pcbddc->n_ISForDofsLocal = i; 1359 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1360 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1361 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1362 } 1363 } 1364 } 1365 } 1366 1367 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1368 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1369 } 1370 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1371 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1372 } 1373 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1374 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1375 } 1376 ierr = VecDestroy(&global);CHKERRQ(ierr); 1377 ierr = VecDestroy(&local);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1383 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1384 { 1385 PC_IS *pcis = (PC_IS*)(pc->data); 1386 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1387 PetscErrorCode ierr; 1388 1389 PetscFunctionBegin; 1390 if (!pcbddc->benign_have_null) { 1391 PetscFunctionReturn(0); 1392 } 1393 if (pcbddc->ChangeOfBasisMatrix) { 1394 Vec swap; 1395 1396 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1397 swap = pcbddc->work_change; 1398 pcbddc->work_change = r; 1399 r = swap; 1400 } 1401 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1402 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1403 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1404 ierr = VecSet(z,0.);CHKERRQ(ierr); 1405 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1406 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1407 if (pcbddc->ChangeOfBasisMatrix) { 1408 pcbddc->work_change = r; 1409 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1410 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1411 } 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1417 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1418 { 1419 PCBDDCBenignMatMult_ctx ctx; 1420 PetscErrorCode ierr; 1421 PetscBool apply_right,apply_left,reset_x; 1422 1423 PetscFunctionBegin; 1424 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1425 if (transpose) { 1426 apply_right = ctx->apply_left; 1427 apply_left = ctx->apply_right; 1428 } else { 1429 apply_right = ctx->apply_right; 1430 apply_left = ctx->apply_left; 1431 } 1432 reset_x = PETSC_FALSE; 1433 if (apply_right) { 1434 const PetscScalar *ax; 1435 PetscInt nl,i; 1436 1437 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1438 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1439 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1440 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1441 for (i=0;i<ctx->benign_n;i++) { 1442 PetscScalar sum,val; 1443 const PetscInt *idxs; 1444 PetscInt nz,j; 1445 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1446 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1447 sum = 0.; 1448 if (ctx->apply_p0) { 1449 val = ctx->work[idxs[nz-1]]; 1450 for (j=0;j<nz-1;j++) { 1451 sum += ctx->work[idxs[j]]; 1452 ctx->work[idxs[j]] += val; 1453 } 1454 } else { 1455 for (j=0;j<nz-1;j++) { 1456 sum += ctx->work[idxs[j]]; 1457 } 1458 } 1459 ctx->work[idxs[nz-1]] -= sum; 1460 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1461 } 1462 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1463 reset_x = PETSC_TRUE; 1464 } 1465 if (transpose) { 1466 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1467 } else { 1468 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1469 } 1470 if (reset_x) { 1471 ierr = VecResetArray(x);CHKERRQ(ierr); 1472 } 1473 if (apply_left) { 1474 PetscScalar *ay; 1475 PetscInt i; 1476 1477 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1478 for (i=0;i<ctx->benign_n;i++) { 1479 PetscScalar sum,val; 1480 const PetscInt *idxs; 1481 PetscInt nz,j; 1482 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1483 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1484 val = -ay[idxs[nz-1]]; 1485 if (ctx->apply_p0) { 1486 sum = 0.; 1487 for (j=0;j<nz-1;j++) { 1488 sum += ay[idxs[j]]; 1489 ay[idxs[j]] += val; 1490 } 1491 ay[idxs[nz-1]] += sum; 1492 } else { 1493 for (j=0;j<nz-1;j++) { 1494 ay[idxs[j]] += val; 1495 } 1496 ay[idxs[nz-1]] = 0.; 1497 } 1498 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1499 } 1500 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1507 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1508 { 1509 PetscErrorCode ierr; 1510 1511 PetscFunctionBegin; 1512 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1518 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1519 { 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "PCBDDCBenignShellMat" 1529 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1530 { 1531 PC_IS *pcis = (PC_IS*)pc->data; 1532 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1533 PCBDDCBenignMatMult_ctx ctx; 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 if (!restore) { 1538 Mat A_IB,A_BI; 1539 PetscScalar *work; 1540 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1541 1542 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1543 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1544 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1545 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1546 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1547 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1548 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1549 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1550 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1551 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1552 ctx->apply_left = PETSC_TRUE; 1553 ctx->apply_right = PETSC_FALSE; 1554 ctx->apply_p0 = PETSC_FALSE; 1555 ctx->benign_n = pcbddc->benign_n; 1556 if (reuse) { 1557 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1558 ctx->free = PETSC_FALSE; 1559 } else { /* TODO: could be optimized for successive solves */ 1560 ISLocalToGlobalMapping N_to_D; 1561 PetscInt i; 1562 1563 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1564 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1565 for (i=0;i<pcbddc->benign_n;i++) { 1566 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1567 } 1568 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1569 ctx->free = PETSC_TRUE; 1570 } 1571 ctx->A = pcis->A_IB; 1572 ctx->work = work; 1573 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1574 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1575 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1576 pcis->A_IB = A_IB; 1577 1578 /* A_BI as A_IB^T */ 1579 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1580 pcbddc->benign_original_mat = pcis->A_BI; 1581 pcis->A_BI = A_BI; 1582 } else { 1583 if (!pcbddc->benign_original_mat) { 1584 PetscFunctionReturn(0); 1585 } 1586 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1587 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1588 pcis->A_IB = ctx->A; 1589 ctx->A = NULL; 1590 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1591 pcis->A_BI = pcbddc->benign_original_mat; 1592 pcbddc->benign_original_mat = NULL; 1593 if (ctx->free) { 1594 PetscInt i; 1595 for (i=0;i<ctx->benign_n;i++) { 1596 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1597 } 1598 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1599 } 1600 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1601 ierr = PetscFree(ctx);CHKERRQ(ierr); 1602 } 1603 PetscFunctionReturn(0); 1604 } 1605 1606 /* used just in bddc debug mode */ 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "PCBDDCBenignProject" 1609 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1610 { 1611 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1612 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1613 Mat An; 1614 PetscErrorCode ierr; 1615 1616 PetscFunctionBegin; 1617 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1618 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1619 if (is1) { 1620 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1621 ierr = MatDestroy(&An);CHKERRQ(ierr); 1622 } else { 1623 *B = An; 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 /* TODO: add reuse flag */ 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatSeqAIJCompress" 1631 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1632 { 1633 Mat Bt; 1634 PetscScalar *a,*bdata; 1635 const PetscInt *ii,*ij; 1636 PetscInt m,n,i,nnz,*bii,*bij; 1637 PetscBool flg_row; 1638 PetscErrorCode ierr; 1639 1640 PetscFunctionBegin; 1641 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1642 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1643 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1644 nnz = n; 1645 for (i=0;i<ii[n];i++) { 1646 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1647 } 1648 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1649 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1650 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1651 nnz = 0; 1652 bii[0] = 0; 1653 for (i=0;i<n;i++) { 1654 PetscInt j; 1655 for (j=ii[i];j<ii[i+1];j++) { 1656 PetscScalar entry = a[j]; 1657 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1658 bij[nnz] = ij[j]; 1659 bdata[nnz] = entry; 1660 nnz++; 1661 } 1662 } 1663 bii[i+1] = nnz; 1664 } 1665 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1666 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1667 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1668 { 1669 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1670 b->free_a = PETSC_TRUE; 1671 b->free_ij = PETSC_TRUE; 1672 } 1673 *B = Bt; 1674 PetscFunctionReturn(0); 1675 } 1676 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatDetectDisconnectedComponents" 1679 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1680 { 1681 Mat B; 1682 IS is_dummy,*cc_n; 1683 ISLocalToGlobalMapping l2gmap_dummy; 1684 PCBDDCGraph graph; 1685 PetscInt i,n; 1686 PetscInt *xadj,*adjncy; 1687 PetscInt *xadj_filtered,*adjncy_filtered; 1688 PetscBool flg_row,isseqaij; 1689 PetscErrorCode ierr; 1690 1691 PetscFunctionBegin; 1692 if (!A->rmap->N || !A->cmap->N) { 1693 *ncc = 0; 1694 *cc = NULL; 1695 PetscFunctionReturn(0); 1696 } 1697 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1698 if (!isseqaij && filter) { 1699 PetscBool isseqdense; 1700 1701 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1702 if (!isseqdense) { 1703 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1704 } else { /* TODO: rectangular case and LDA */ 1705 PetscScalar *array; 1706 PetscReal chop=1.e-6; 1707 1708 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1709 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1710 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1711 for (i=0;i<n;i++) { 1712 PetscInt j; 1713 for (j=i+1;j<n;j++) { 1714 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1715 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1716 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1717 } 1718 } 1719 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1720 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1721 } 1722 } else { 1723 B = A; 1724 } 1725 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1726 1727 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1728 if (filter) { 1729 PetscScalar *data; 1730 PetscInt j,cum; 1731 1732 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1733 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1734 cum = 0; 1735 for (i=0;i<n;i++) { 1736 PetscInt t; 1737 1738 for (j=xadj[i];j<xadj[i+1];j++) { 1739 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1740 continue; 1741 } 1742 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1743 } 1744 t = xadj_filtered[i]; 1745 xadj_filtered[i] = cum; 1746 cum += t; 1747 } 1748 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1749 } else { 1750 xadj_filtered = NULL; 1751 adjncy_filtered = NULL; 1752 } 1753 1754 /* compute local connected components using PCBDDCGraph */ 1755 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1756 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1757 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1758 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1759 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1760 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1761 if (xadj_filtered) { 1762 graph->xadj = xadj_filtered; 1763 graph->adjncy = adjncy_filtered; 1764 } else { 1765 graph->xadj = xadj; 1766 graph->adjncy = adjncy; 1767 } 1768 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1769 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1770 /* partial clean up */ 1771 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1772 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1773 if (A != B) { 1774 ierr = MatDestroy(&B);CHKERRQ(ierr); 1775 } 1776 1777 /* get back data */ 1778 if (ncc) *ncc = graph->ncc; 1779 if (cc) { 1780 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1781 for (i=0;i<graph->ncc;i++) { 1782 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1783 } 1784 *cc = cc_n; 1785 } 1786 /* clean up graph */ 1787 graph->xadj = 0; 1788 graph->adjncy = 0; 1789 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "PCBDDCBenignCheck" 1795 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1796 { 1797 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1798 PC_IS* pcis = (PC_IS*)(pc->data); 1799 IS dirIS = NULL; 1800 PetscInt i; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1805 if (zerodiag) { 1806 Mat A; 1807 Vec vec3_N; 1808 PetscScalar *vals; 1809 const PetscInt *idxs; 1810 PetscInt nz,*count; 1811 1812 /* p0 */ 1813 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1814 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1815 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1816 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1817 for (i=0;i<nz;i++) vals[i] = 1.; 1818 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1819 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1820 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1821 /* v_I */ 1822 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1823 for (i=0;i<nz;i++) vals[i] = 0.; 1824 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1825 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1826 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1827 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1828 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1829 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1830 if (dirIS) { 1831 PetscInt n; 1832 1833 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1834 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1835 for (i=0;i<n;i++) vals[i] = 0.; 1836 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1837 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1838 } 1839 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1840 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1841 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1842 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1843 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1844 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1845 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1846 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1847 ierr = PetscFree(vals);CHKERRQ(ierr); 1848 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1849 1850 /* there should not be any pressure dofs lying on the interface */ 1851 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1852 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1853 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1854 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1855 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1856 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1857 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1858 ierr = PetscFree(count);CHKERRQ(ierr); 1859 } 1860 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1861 1862 /* check PCBDDCBenignGetOrSetP0 */ 1863 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1864 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1865 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1866 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1867 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1868 for (i=0;i<pcbddc->benign_n;i++) { 1869 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1870 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1871 } 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1877 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1878 { 1879 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1880 IS pressures,zerodiag,*zerodiag_subs; 1881 PetscInt nz,n; 1882 PetscInt *interior_dofs,n_interior_dofs; 1883 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1884 PetscErrorCode ierr; 1885 1886 PetscFunctionBegin; 1887 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1888 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1889 for (n=0;n<pcbddc->benign_n;n++) { 1890 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1891 } 1892 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1893 pcbddc->benign_n = 0; 1894 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1895 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1896 Checks if all the pressure dofs in each subdomain have a zero diagonal 1897 If not, a change of basis on pressures is not needed 1898 since the local Schur complements are already SPD 1899 */ 1900 has_null_pressures = PETSC_TRUE; 1901 have_null = PETSC_TRUE; 1902 if (pcbddc->n_ISForDofsLocal) { 1903 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1904 1905 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1906 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1907 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1908 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1909 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1910 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1911 if (!sorted) { 1912 ierr = ISSort(pressures);CHKERRQ(ierr); 1913 } 1914 } else { 1915 pressures = NULL; 1916 } 1917 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1918 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1919 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1920 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1921 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1922 if (!sorted) { 1923 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1924 } 1925 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1926 if (!nz) { 1927 if (n) have_null = PETSC_FALSE; 1928 has_null_pressures = PETSC_FALSE; 1929 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1930 } 1931 recompute_zerodiag = PETSC_FALSE; 1932 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1933 zerodiag_subs = NULL; 1934 pcbddc->benign_n = 0; 1935 n_interior_dofs = 0; 1936 interior_dofs = NULL; 1937 if (pcbddc->current_level) { /* need to compute interior nodes */ 1938 PetscInt n,i,j; 1939 PetscInt n_neigh,*neigh,*n_shared,**shared; 1940 PetscInt *iwork; 1941 1942 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1943 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1944 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1945 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1946 for (i=1;i<n_neigh;i++) 1947 for (j=0;j<n_shared[i];j++) 1948 iwork[shared[i][j]] += 1; 1949 for (i=0;i<n;i++) 1950 if (!iwork[i]) 1951 interior_dofs[n_interior_dofs++] = i; 1952 ierr = PetscFree(iwork);CHKERRQ(ierr); 1953 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1954 } 1955 if (has_null_pressures) { 1956 IS *subs; 1957 PetscInt nsubs,i,j,nl; 1958 const PetscInt *idxs; 1959 PetscScalar *array; 1960 Vec *work; 1961 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1962 1963 subs = pcbddc->local_subs; 1964 nsubs = pcbddc->n_local_subs; 1965 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1966 if (pcbddc->current_level) { 1967 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1968 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1969 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1970 /* work[0] = 1_p */ 1971 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1972 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1973 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1974 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1975 /* work[0] = 1_v */ 1976 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1977 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1978 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1979 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1980 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1981 } 1982 if (nsubs > 1) { 1983 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1984 for (i=0;i<nsubs;i++) { 1985 ISLocalToGlobalMapping l2g; 1986 IS t_zerodiag_subs; 1987 PetscInt nl; 1988 1989 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1990 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1991 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1992 if (nl) { 1993 PetscBool valid = PETSC_TRUE; 1994 1995 if (pcbddc->current_level) { 1996 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1997 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1998 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1999 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2000 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2001 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2002 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2003 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2004 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2005 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2006 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2007 for (j=0;j<n_interior_dofs;j++) { 2008 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2009 valid = PETSC_FALSE; 2010 break; 2011 } 2012 } 2013 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2014 } 2015 if (valid && pcbddc->NeumannBoundariesLocal) { 2016 IS t_bc; 2017 PetscInt nzb; 2018 2019 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 2020 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 2021 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 2022 if (nzb) valid = PETSC_FALSE; 2023 } 2024 if (valid && pressures) { 2025 IS t_pressure_subs; 2026 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2027 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2028 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2029 } 2030 if (valid) { 2031 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2032 pcbddc->benign_n++; 2033 } else { 2034 recompute_zerodiag = PETSC_TRUE; 2035 } 2036 } 2037 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2038 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2039 } 2040 } else { /* there's just one subdomain (or zero if they have not been detected */ 2041 PetscBool valid = PETSC_TRUE; 2042 2043 if (pcbddc->NeumannBoundariesLocal) { 2044 PetscInt nzb; 2045 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 2046 if (nzb) valid = PETSC_FALSE; 2047 } 2048 if (valid && pressures) { 2049 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2050 } 2051 if (valid && pcbddc->current_level) { 2052 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2053 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2054 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2055 for (j=0;j<n_interior_dofs;j++) { 2056 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2057 valid = PETSC_FALSE; 2058 break; 2059 } 2060 } 2061 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2062 } 2063 if (valid) { 2064 pcbddc->benign_n = 1; 2065 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2066 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2067 zerodiag_subs[0] = zerodiag; 2068 } 2069 } 2070 if (pcbddc->current_level) { 2071 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2072 } 2073 } 2074 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2075 2076 if (!pcbddc->benign_n) { 2077 PetscInt n; 2078 2079 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2080 recompute_zerodiag = PETSC_FALSE; 2081 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2082 if (n) { 2083 has_null_pressures = PETSC_FALSE; 2084 have_null = PETSC_FALSE; 2085 } 2086 } 2087 2088 /* final check for null pressures */ 2089 if (zerodiag && pressures) { 2090 PetscInt nz,np; 2091 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2092 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2093 if (nz != np) have_null = PETSC_FALSE; 2094 } 2095 2096 if (recompute_zerodiag) { 2097 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2098 if (pcbddc->benign_n == 1) { 2099 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2100 zerodiag = zerodiag_subs[0]; 2101 } else { 2102 PetscInt i,nzn,*new_idxs; 2103 2104 nzn = 0; 2105 for (i=0;i<pcbddc->benign_n;i++) { 2106 PetscInt ns; 2107 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2108 nzn += ns; 2109 } 2110 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2111 nzn = 0; 2112 for (i=0;i<pcbddc->benign_n;i++) { 2113 PetscInt ns,*idxs; 2114 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2115 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2116 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2117 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2118 nzn += ns; 2119 } 2120 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2121 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2122 } 2123 have_null = PETSC_FALSE; 2124 } 2125 2126 /* Prepare matrix to compute no-net-flux */ 2127 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2128 Mat A,loc_divudotp; 2129 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2130 IS row,col,isused = NULL; 2131 PetscInt M,N,n,st,n_isused; 2132 2133 if (pressures) { 2134 isused = pressures; 2135 } else { 2136 isused = zerodiag; 2137 } 2138 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2139 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2140 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2141 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2142 n_isused = 0; 2143 if (isused) { 2144 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2145 } 2146 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2147 st = st-n_isused; 2148 if (n) { 2149 const PetscInt *gidxs; 2150 2151 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2152 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2153 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2154 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2155 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2156 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2157 } else { 2158 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2159 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2160 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2161 } 2162 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2163 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2164 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2165 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2166 ierr = ISDestroy(&row);CHKERRQ(ierr); 2167 ierr = ISDestroy(&col);CHKERRQ(ierr); 2168 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2169 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2170 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2171 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2172 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2173 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2174 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2175 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2176 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 } 2179 2180 /* change of basis and p0 dofs */ 2181 if (has_null_pressures) { 2182 IS zerodiagc; 2183 const PetscInt *idxs,*idxsc; 2184 PetscInt i,s,*nnz; 2185 2186 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2187 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2188 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2189 /* local change of basis for pressures */ 2190 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2191 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2192 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2193 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2194 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2195 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2196 for (i=0;i<pcbddc->benign_n;i++) { 2197 PetscInt nzs,j; 2198 2199 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2200 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2201 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2202 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2203 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2204 } 2205 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2206 ierr = PetscFree(nnz);CHKERRQ(ierr); 2207 /* set identity on velocities */ 2208 for (i=0;i<n-nz;i++) { 2209 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2210 } 2211 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2212 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2213 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2214 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2215 /* set change on pressures */ 2216 for (s=0;s<pcbddc->benign_n;s++) { 2217 PetscScalar *array; 2218 PetscInt nzs; 2219 2220 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2221 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2222 for (i=0;i<nzs-1;i++) { 2223 PetscScalar vals[2]; 2224 PetscInt cols[2]; 2225 2226 cols[0] = idxs[i]; 2227 cols[1] = idxs[nzs-1]; 2228 vals[0] = 1.; 2229 vals[1] = 1.; 2230 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2231 } 2232 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2233 for (i=0;i<nzs-1;i++) array[i] = -1.; 2234 array[nzs-1] = 1.; 2235 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2236 /* store local idxs for p0 */ 2237 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2238 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2239 ierr = PetscFree(array);CHKERRQ(ierr); 2240 } 2241 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2242 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2243 /* project if needed */ 2244 if (pcbddc->benign_change_explicit) { 2245 Mat M; 2246 2247 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2248 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2249 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2250 ierr = MatDestroy(&M);CHKERRQ(ierr); 2251 } 2252 /* store global idxs for p0 */ 2253 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2254 } 2255 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2256 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2257 2258 /* determines if the coarse solver will be singular or not */ 2259 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2260 /* determines if the problem has subdomains with 0 pressure block */ 2261 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2262 *zerodiaglocal = zerodiag; 2263 PetscFunctionReturn(0); 2264 } 2265 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2268 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2269 { 2270 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2271 PetscScalar *array; 2272 PetscErrorCode ierr; 2273 2274 PetscFunctionBegin; 2275 if (!pcbddc->benign_sf) { 2276 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2277 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2278 } 2279 if (get) { 2280 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2281 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2282 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2283 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2284 } else { 2285 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2286 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2287 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2288 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2295 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2296 { 2297 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2298 PetscErrorCode ierr; 2299 2300 PetscFunctionBegin; 2301 /* TODO: add error checking 2302 - avoid nested pop (or push) calls. 2303 - cannot push before pop. 2304 - cannot call this if pcbddc->local_mat is NULL 2305 */ 2306 if (!pcbddc->benign_n) { 2307 PetscFunctionReturn(0); 2308 } 2309 if (pop) { 2310 if (pcbddc->benign_change_explicit) { 2311 IS is_p0; 2312 MatReuse reuse; 2313 2314 /* extract B_0 */ 2315 reuse = MAT_INITIAL_MATRIX; 2316 if (pcbddc->benign_B0) { 2317 reuse = MAT_REUSE_MATRIX; 2318 } 2319 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2320 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2321 /* remove rows and cols from local problem */ 2322 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2323 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2324 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2325 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2326 } else { 2327 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2328 PetscScalar *vals; 2329 PetscInt i,n,*idxs_ins; 2330 2331 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2332 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2333 if (!pcbddc->benign_B0) { 2334 PetscInt *nnz; 2335 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2336 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2337 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2338 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2339 for (i=0;i<pcbddc->benign_n;i++) { 2340 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2341 nnz[i] = n - nnz[i]; 2342 } 2343 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2344 ierr = PetscFree(nnz);CHKERRQ(ierr); 2345 } 2346 2347 for (i=0;i<pcbddc->benign_n;i++) { 2348 PetscScalar *array; 2349 PetscInt *idxs,j,nz,cum; 2350 2351 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2352 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2353 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2354 for (j=0;j<nz;j++) vals[j] = 1.; 2355 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2356 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2357 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2358 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2359 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2360 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2361 cum = 0; 2362 for (j=0;j<n;j++) { 2363 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2364 vals[cum] = array[j]; 2365 idxs_ins[cum] = j; 2366 cum++; 2367 } 2368 } 2369 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2370 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2371 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2372 } 2373 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2374 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2375 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2376 } 2377 } else { /* push */ 2378 if (pcbddc->benign_change_explicit) { 2379 PetscInt i; 2380 2381 for (i=0;i<pcbddc->benign_n;i++) { 2382 PetscScalar *B0_vals; 2383 PetscInt *B0_cols,B0_ncol; 2384 2385 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2386 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2387 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2388 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2389 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2390 } 2391 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2392 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 } else { 2394 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2395 } 2396 } 2397 PetscFunctionReturn(0); 2398 } 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2402 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2403 { 2404 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2405 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2406 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2407 PetscBLASInt *B_iwork,*B_ifail; 2408 PetscScalar *work,lwork; 2409 PetscScalar *St,*S,*eigv; 2410 PetscScalar *Sarray,*Starray; 2411 PetscReal *eigs,thresh; 2412 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2413 PetscBool allocated_S_St; 2414 #if defined(PETSC_USE_COMPLEX) 2415 PetscReal *rwork; 2416 #endif 2417 PetscErrorCode ierr; 2418 2419 PetscFunctionBegin; 2420 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2421 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2422 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2423 2424 if (pcbddc->dbg_flag) { 2425 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2426 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2427 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2428 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2429 } 2430 2431 if (pcbddc->dbg_flag) { 2432 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2433 } 2434 2435 /* max size of subsets */ 2436 mss = 0; 2437 for (i=0;i<sub_schurs->n_subs;i++) { 2438 PetscInt subset_size; 2439 2440 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2441 mss = PetscMax(mss,subset_size); 2442 } 2443 2444 /* min/max and threshold */ 2445 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2446 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2447 nmax = PetscMax(nmin,nmax); 2448 allocated_S_St = PETSC_FALSE; 2449 if (nmin) { 2450 allocated_S_St = PETSC_TRUE; 2451 } 2452 2453 /* allocate lapack workspace */ 2454 cum = cum2 = 0; 2455 maxneigs = 0; 2456 for (i=0;i<sub_schurs->n_subs;i++) { 2457 PetscInt n,subset_size; 2458 2459 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2460 n = PetscMin(subset_size,nmax); 2461 cum += subset_size; 2462 cum2 += subset_size*n; 2463 maxneigs = PetscMax(maxneigs,n); 2464 } 2465 if (mss) { 2466 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2467 PetscBLASInt B_itype = 1; 2468 PetscBLASInt B_N = mss; 2469 PetscReal zero = 0.0; 2470 PetscReal eps = 0.0; /* dlamch? */ 2471 2472 B_lwork = -1; 2473 S = NULL; 2474 St = NULL; 2475 eigs = NULL; 2476 eigv = NULL; 2477 B_iwork = NULL; 2478 B_ifail = NULL; 2479 #if defined(PETSC_USE_COMPLEX) 2480 rwork = NULL; 2481 #endif 2482 thresh = 1.0; 2483 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2484 #if defined(PETSC_USE_COMPLEX) 2485 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2486 #else 2487 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2488 #endif 2489 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2490 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2491 } else { 2492 /* TODO */ 2493 } 2494 } else { 2495 lwork = 0; 2496 } 2497 2498 nv = 0; 2499 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2500 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2501 } 2502 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2503 if (allocated_S_St) { 2504 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2505 } 2506 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2507 #if defined(PETSC_USE_COMPLEX) 2508 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2509 #endif 2510 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2511 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2512 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2513 nv+cum,&pcbddc->adaptive_constraints_idxs, 2514 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2515 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2516 2517 maxneigs = 0; 2518 cum = cumarray = 0; 2519 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2520 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2521 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2522 const PetscInt *idxs; 2523 2524 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2525 for (cum=0;cum<nv;cum++) { 2526 pcbddc->adaptive_constraints_n[cum] = 1; 2527 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2528 pcbddc->adaptive_constraints_data[cum] = 1.0; 2529 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2530 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2531 } 2532 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2533 } 2534 2535 if (mss) { /* multilevel */ 2536 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2537 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2538 } 2539 2540 thresh = pcbddc->adaptive_threshold; 2541 for (i=0;i<sub_schurs->n_subs;i++) { 2542 const PetscInt *idxs; 2543 PetscReal upper,lower; 2544 PetscInt j,subset_size,eigs_start = 0; 2545 PetscBLASInt B_N; 2546 PetscBool same_data = PETSC_FALSE; 2547 2548 if (pcbddc->use_deluxe_scaling) { 2549 upper = PETSC_MAX_REAL; 2550 lower = thresh; 2551 } else { 2552 upper = 1./thresh; 2553 lower = 0.; 2554 } 2555 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2556 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2557 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2558 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2559 if (sub_schurs->is_hermitian) { 2560 PetscInt j,k; 2561 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2562 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2563 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2564 } 2565 for (j=0;j<subset_size;j++) { 2566 for (k=j;k<subset_size;k++) { 2567 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2568 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2569 } 2570 } 2571 } else { 2572 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2573 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2574 } 2575 } else { 2576 S = Sarray + cumarray; 2577 St = Starray + cumarray; 2578 } 2579 /* see if we can save some work */ 2580 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2581 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2582 } 2583 2584 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2585 B_neigs = 0; 2586 } else { 2587 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2588 PetscBLASInt B_itype = 1; 2589 PetscBLASInt B_IL, B_IU; 2590 PetscReal eps = -1.0; /* dlamch? */ 2591 PetscInt nmin_s; 2592 PetscBool compute_range = PETSC_FALSE; 2593 2594 if (pcbddc->dbg_flag) { 2595 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2596 } 2597 2598 compute_range = PETSC_FALSE; 2599 if (thresh > 1.+PETSC_SMALL && !same_data) { 2600 compute_range = PETSC_TRUE; 2601 } 2602 2603 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2604 if (compute_range) { 2605 2606 /* ask for eigenvalues larger than thresh */ 2607 #if defined(PETSC_USE_COMPLEX) 2608 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2609 #else 2610 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2611 #endif 2612 } else if (!same_data) { 2613 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2614 B_IL = 1; 2615 #if defined(PETSC_USE_COMPLEX) 2616 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2617 #else 2618 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2619 #endif 2620 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2621 PetscInt k; 2622 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2623 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2624 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2625 nmin = nmax; 2626 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2627 for (k=0;k<nmax;k++) { 2628 eigs[k] = 1./PETSC_SMALL; 2629 eigv[k*(subset_size+1)] = 1.0; 2630 } 2631 } 2632 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2633 if (B_ierr) { 2634 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2635 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2636 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2637 } 2638 2639 if (B_neigs > nmax) { 2640 if (pcbddc->dbg_flag) { 2641 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2642 } 2643 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2644 B_neigs = nmax; 2645 } 2646 2647 nmin_s = PetscMin(nmin,B_N); 2648 if (B_neigs < nmin_s) { 2649 PetscBLASInt B_neigs2; 2650 2651 if (pcbddc->use_deluxe_scaling) { 2652 B_IL = B_N - nmin_s + 1; 2653 B_IU = B_N - B_neigs; 2654 } else { 2655 B_IL = B_neigs + 1; 2656 B_IU = nmin_s; 2657 } 2658 if (pcbddc->dbg_flag) { 2659 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2660 } 2661 if (sub_schurs->is_hermitian) { 2662 PetscInt j,k; 2663 for (j=0;j<subset_size;j++) { 2664 for (k=j;k<subset_size;k++) { 2665 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2666 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2667 } 2668 } 2669 } else { 2670 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2671 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2672 } 2673 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2674 #if defined(PETSC_USE_COMPLEX) 2675 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2676 #else 2677 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2678 #endif 2679 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2680 B_neigs += B_neigs2; 2681 } 2682 if (B_ierr) { 2683 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2684 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2685 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2686 } 2687 if (pcbddc->dbg_flag) { 2688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2689 for (j=0;j<B_neigs;j++) { 2690 if (eigs[j] == 0.0) { 2691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2692 } else { 2693 if (pcbddc->use_deluxe_scaling) { 2694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2695 } else { 2696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2697 } 2698 } 2699 } 2700 } 2701 } else { 2702 /* TODO */ 2703 } 2704 } 2705 /* change the basis back to the original one */ 2706 if (sub_schurs->change) { 2707 Mat change,phi,phit; 2708 2709 if (pcbddc->dbg_flag > 1) { 2710 PetscInt ii; 2711 for (ii=0;ii<B_neigs;ii++) { 2712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2713 for (j=0;j<B_N;j++) { 2714 #if defined(PETSC_USE_COMPLEX) 2715 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2716 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2717 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2718 #else 2719 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2720 #endif 2721 } 2722 } 2723 } 2724 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2725 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2726 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2727 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2728 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2729 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2730 } 2731 maxneigs = PetscMax(B_neigs,maxneigs); 2732 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2733 if (B_neigs) { 2734 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2735 2736 if (pcbddc->dbg_flag > 1) { 2737 PetscInt ii; 2738 for (ii=0;ii<B_neigs;ii++) { 2739 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2740 for (j=0;j<B_N;j++) { 2741 #if defined(PETSC_USE_COMPLEX) 2742 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2743 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2744 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2745 #else 2746 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2747 #endif 2748 } 2749 } 2750 } 2751 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2752 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2753 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2754 cum++; 2755 } 2756 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2757 /* shift for next computation */ 2758 cumarray += subset_size*subset_size; 2759 } 2760 if (pcbddc->dbg_flag) { 2761 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2762 } 2763 2764 if (mss) { 2765 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2766 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2767 /* destroy matrices (junk) */ 2768 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2769 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2770 } 2771 if (allocated_S_St) { 2772 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2773 } 2774 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2775 #if defined(PETSC_USE_COMPLEX) 2776 ierr = PetscFree(rwork);CHKERRQ(ierr); 2777 #endif 2778 if (pcbddc->dbg_flag) { 2779 PetscInt maxneigs_r; 2780 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2781 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2782 } 2783 PetscFunctionReturn(0); 2784 } 2785 2786 #undef __FUNCT__ 2787 #define __FUNCT__ "PCBDDCSetUpSolvers" 2788 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2789 { 2790 PetscScalar *coarse_submat_vals; 2791 PetscErrorCode ierr; 2792 2793 PetscFunctionBegin; 2794 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2795 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2796 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2797 2798 /* Setup local neumann solver ksp_R */ 2799 /* PCBDDCSetUpLocalScatters should be called first! */ 2800 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2801 2802 /* 2803 Setup local correction and local part of coarse basis. 2804 Gives back the dense local part of the coarse matrix in column major ordering 2805 */ 2806 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2807 2808 /* Compute total number of coarse nodes and setup coarse solver */ 2809 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2810 2811 /* free */ 2812 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2813 PetscFunctionReturn(0); 2814 } 2815 2816 #undef __FUNCT__ 2817 #define __FUNCT__ "PCBDDCResetCustomization" 2818 PetscErrorCode PCBDDCResetCustomization(PC pc) 2819 { 2820 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2821 PetscErrorCode ierr; 2822 2823 PetscFunctionBegin; 2824 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2825 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2826 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2827 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2828 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2829 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2830 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2831 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2832 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2833 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2834 PetscFunctionReturn(0); 2835 } 2836 2837 #undef __FUNCT__ 2838 #define __FUNCT__ "PCBDDCResetTopography" 2839 PetscErrorCode PCBDDCResetTopography(PC pc) 2840 { 2841 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2842 PetscInt i; 2843 PetscErrorCode ierr; 2844 2845 PetscFunctionBegin; 2846 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2847 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2848 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2849 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2850 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2851 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2852 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2853 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2854 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2855 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2856 for (i=0;i<pcbddc->n_local_subs;i++) { 2857 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2858 } 2859 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2860 if (pcbddc->sub_schurs) { 2861 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2862 } 2863 pcbddc->graphanalyzed = PETSC_FALSE; 2864 pcbddc->recompute_topography = PETSC_TRUE; 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "PCBDDCResetSolvers" 2870 PetscErrorCode PCBDDCResetSolvers(PC pc) 2871 { 2872 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2873 PetscErrorCode ierr; 2874 2875 PetscFunctionBegin; 2876 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2877 if (pcbddc->coarse_phi_B) { 2878 PetscScalar *array; 2879 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2880 ierr = PetscFree(array);CHKERRQ(ierr); 2881 } 2882 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2883 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2884 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2885 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2886 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2887 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2888 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2889 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2890 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2891 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2892 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2893 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2894 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2895 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2896 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2897 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2898 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2899 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2900 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2901 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2902 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2903 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2904 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2905 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2906 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2907 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2908 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2909 if (pcbddc->benign_zerodiag_subs) { 2910 PetscInt i; 2911 for (i=0;i<pcbddc->benign_n;i++) { 2912 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2913 } 2914 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2915 } 2916 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2917 PetscFunctionReturn(0); 2918 } 2919 2920 #undef __FUNCT__ 2921 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2922 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2923 { 2924 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2925 PC_IS *pcis = (PC_IS*)pc->data; 2926 VecType impVecType; 2927 PetscInt n_constraints,n_R,old_size; 2928 PetscErrorCode ierr; 2929 2930 PetscFunctionBegin; 2931 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2932 /* get sizes */ 2933 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2934 n_R = pcis->n - pcbddc->n_vertices; 2935 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2936 /* local work vectors (try to avoid unneeded work)*/ 2937 /* R nodes */ 2938 old_size = -1; 2939 if (pcbddc->vec1_R) { 2940 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2941 } 2942 if (n_R != old_size) { 2943 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2944 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2945 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2946 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2947 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2948 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2949 } 2950 /* local primal dofs */ 2951 old_size = -1; 2952 if (pcbddc->vec1_P) { 2953 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2954 } 2955 if (pcbddc->local_primal_size != old_size) { 2956 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2957 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2958 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2959 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2960 } 2961 /* local explicit constraints */ 2962 old_size = -1; 2963 if (pcbddc->vec1_C) { 2964 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2965 } 2966 if (n_constraints && n_constraints != old_size) { 2967 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2968 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2969 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2970 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2971 } 2972 PetscFunctionReturn(0); 2973 } 2974 2975 #undef __FUNCT__ 2976 #define __FUNCT__ "PCBDDCSetUpCorrection" 2977 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2978 { 2979 PetscErrorCode ierr; 2980 /* pointers to pcis and pcbddc */ 2981 PC_IS* pcis = (PC_IS*)pc->data; 2982 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2983 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2984 /* submatrices of local problem */ 2985 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2986 /* submatrices of local coarse problem */ 2987 Mat S_VV,S_CV,S_VC,S_CC; 2988 /* working matrices */ 2989 Mat C_CR; 2990 /* additional working stuff */ 2991 PC pc_R; 2992 Mat F; 2993 Vec dummy_vec; 2994 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2995 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2996 PetscScalar *work; 2997 PetscInt *idx_V_B; 2998 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2999 PetscInt i,n_R,n_D,n_B; 3000 3001 /* some shortcuts to scalars */ 3002 PetscScalar one=1.0,m_one=-1.0; 3003 3004 PetscFunctionBegin; 3005 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3006 3007 /* Set Non-overlapping dimensions */ 3008 n_vertices = pcbddc->n_vertices; 3009 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3010 n_B = pcis->n_B; 3011 n_D = pcis->n - n_B; 3012 n_R = pcis->n - n_vertices; 3013 3014 /* vertices in boundary numbering */ 3015 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3016 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3017 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3018 3019 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3020 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3021 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3022 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3023 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3024 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3025 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3026 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3027 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3028 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3029 3030 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3031 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3032 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3033 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3034 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3035 lda_rhs = n_R; 3036 need_benign_correction = PETSC_FALSE; 3037 if (isLU || isILU || isCHOL) { 3038 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3039 } else if (sub_schurs && sub_schurs->reuse_solver) { 3040 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3041 MatFactorType type; 3042 3043 F = reuse_solver->F; 3044 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3045 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3046 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3047 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3048 } else { 3049 F = NULL; 3050 } 3051 3052 /* allocate workspace */ 3053 n = 0; 3054 if (n_constraints) { 3055 n += lda_rhs*n_constraints; 3056 } 3057 if (n_vertices) { 3058 n = PetscMax(2*lda_rhs*n_vertices,n); 3059 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3060 } 3061 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3062 3063 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3064 dummy_vec = NULL; 3065 if (need_benign_correction && lda_rhs != n_R && F) { 3066 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3067 } 3068 3069 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3070 if (n_constraints) { 3071 Mat M1,M2,M3,C_B; 3072 IS is_aux; 3073 PetscScalar *array,*array2; 3074 3075 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3076 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3077 3078 /* Extract constraints on R nodes: C_{CR} */ 3079 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3080 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3081 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3082 3083 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3084 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3085 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3086 for (i=0;i<n_constraints;i++) { 3087 const PetscScalar *row_cmat_values; 3088 const PetscInt *row_cmat_indices; 3089 PetscInt size_of_constraint,j; 3090 3091 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3092 for (j=0;j<size_of_constraint;j++) { 3093 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3094 } 3095 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3096 } 3097 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3098 if (F) { 3099 Mat B; 3100 3101 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3102 if (need_benign_correction) { 3103 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3104 3105 /* rhs is already zero on interior dofs, no need to change the rhs */ 3106 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3107 } 3108 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3109 if (need_benign_correction) { 3110 PetscScalar *marr; 3111 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3112 3113 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3114 if (lda_rhs != n_R) { 3115 for (i=0;i<n_constraints;i++) { 3116 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3117 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3118 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3119 } 3120 } else { 3121 for (i=0;i<n_constraints;i++) { 3122 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3123 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3124 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3125 } 3126 } 3127 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3128 } 3129 ierr = MatDestroy(&B);CHKERRQ(ierr); 3130 } else { 3131 PetscScalar *marr; 3132 3133 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3134 for (i=0;i<n_constraints;i++) { 3135 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3136 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3137 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3138 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3139 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3140 } 3141 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3142 } 3143 if (!pcbddc->switch_static) { 3144 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3145 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3146 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3147 for (i=0;i<n_constraints;i++) { 3148 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3149 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3150 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3151 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3152 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3153 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3154 } 3155 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3156 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3157 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3158 } else { 3159 if (lda_rhs != n_R) { 3160 IS dummy; 3161 3162 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3163 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3164 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3165 } else { 3166 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3167 pcbddc->local_auxmat2 = local_auxmat2_R; 3168 } 3169 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3170 } 3171 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3172 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3173 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3174 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3175 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3176 if (isCHOL) { 3177 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3178 } else { 3179 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3180 } 3181 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3182 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3183 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3184 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3185 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3186 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3187 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3188 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3189 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3190 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3191 } 3192 3193 /* Get submatrices from subdomain matrix */ 3194 if (n_vertices) { 3195 IS is_aux; 3196 3197 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3198 IS tis; 3199 3200 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3201 ierr = ISSort(tis);CHKERRQ(ierr); 3202 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3203 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3204 } else { 3205 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3206 } 3207 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3208 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3209 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3210 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3211 } 3212 3213 /* Matrix of coarse basis functions (local) */ 3214 if (pcbddc->coarse_phi_B) { 3215 PetscInt on_B,on_primal,on_D=n_D; 3216 if (pcbddc->coarse_phi_D) { 3217 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3218 } 3219 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3220 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3221 PetscScalar *marray; 3222 3223 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3224 ierr = PetscFree(marray);CHKERRQ(ierr); 3225 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3226 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3227 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3228 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3229 } 3230 } 3231 3232 if (!pcbddc->coarse_phi_B) { 3233 PetscScalar *marray; 3234 3235 n = n_B*pcbddc->local_primal_size; 3236 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3237 n += n_D*pcbddc->local_primal_size; 3238 } 3239 if (!pcbddc->symmetric_primal) { 3240 n *= 2; 3241 } 3242 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3243 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3244 n = n_B*pcbddc->local_primal_size; 3245 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3246 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3247 n += n_D*pcbddc->local_primal_size; 3248 } 3249 if (!pcbddc->symmetric_primal) { 3250 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3251 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3252 n = n_B*pcbddc->local_primal_size; 3253 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3254 } 3255 } else { 3256 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3257 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3258 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3259 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3260 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3261 } 3262 } 3263 } 3264 3265 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3266 p0_lidx_I = NULL; 3267 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3268 const PetscInt *idxs; 3269 3270 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3271 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3272 for (i=0;i<pcbddc->benign_n;i++) { 3273 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3274 } 3275 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3276 } 3277 3278 /* vertices */ 3279 if (n_vertices) { 3280 3281 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3282 3283 if (n_R) { 3284 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3285 PetscBLASInt B_N,B_one = 1; 3286 PetscScalar *x,*y; 3287 PetscBool isseqaij; 3288 3289 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3290 if (need_benign_correction) { 3291 ISLocalToGlobalMapping RtoN; 3292 IS is_p0; 3293 PetscInt *idxs_p0,n; 3294 3295 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3296 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3297 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3298 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3299 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3300 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3301 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3302 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3303 } 3304 3305 if (lda_rhs == n_R) { 3306 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3307 } else { 3308 PetscScalar *av,*array; 3309 const PetscInt *xadj,*adjncy; 3310 PetscInt n; 3311 PetscBool flg_row; 3312 3313 array = work+lda_rhs*n_vertices; 3314 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3315 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3316 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3317 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3318 for (i=0;i<n;i++) { 3319 PetscInt j; 3320 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3321 } 3322 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3323 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3324 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3325 } 3326 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3327 if (need_benign_correction) { 3328 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3329 PetscScalar *marr; 3330 3331 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3332 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3333 3334 | 0 0 0 | (V) 3335 L = | 0 0 -1 | (P-p0) 3336 | 0 0 -1 | (p0) 3337 3338 */ 3339 for (i=0;i<reuse_solver->benign_n;i++) { 3340 const PetscScalar *vals; 3341 const PetscInt *idxs,*idxs_zero; 3342 PetscInt n,j,nz; 3343 3344 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3345 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3346 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3347 for (j=0;j<n;j++) { 3348 PetscScalar val = vals[j]; 3349 PetscInt k,col = idxs[j]; 3350 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3351 } 3352 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3353 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3354 } 3355 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3356 } 3357 if (F) { 3358 /* need to correct the rhs */ 3359 if (need_benign_correction) { 3360 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3361 PetscScalar *marr; 3362 3363 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3364 if (lda_rhs != n_R) { 3365 for (i=0;i<n_vertices;i++) { 3366 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3367 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3368 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3369 } 3370 } else { 3371 for (i=0;i<n_vertices;i++) { 3372 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3373 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3374 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3375 } 3376 } 3377 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3378 } 3379 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3380 /* need to correct the solution */ 3381 if (need_benign_correction) { 3382 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3383 PetscScalar *marr; 3384 3385 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3386 if (lda_rhs != n_R) { 3387 for (i=0;i<n_vertices;i++) { 3388 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3389 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3390 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3391 } 3392 } else { 3393 for (i=0;i<n_vertices;i++) { 3394 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3395 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3396 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3397 } 3398 } 3399 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3400 } 3401 } else { 3402 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3403 for (i=0;i<n_vertices;i++) { 3404 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3405 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3406 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3407 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3408 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3409 } 3410 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3411 } 3412 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3413 /* S_VV and S_CV */ 3414 if (n_constraints) { 3415 Mat B; 3416 3417 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3418 for (i=0;i<n_vertices;i++) { 3419 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3420 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3421 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3422 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3423 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3424 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3425 } 3426 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3427 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3428 ierr = MatDestroy(&B);CHKERRQ(ierr); 3429 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3430 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3431 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3432 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3433 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3434 ierr = MatDestroy(&B);CHKERRQ(ierr); 3435 } 3436 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3437 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3438 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3439 } 3440 if (lda_rhs != n_R) { 3441 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3442 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3443 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3444 } 3445 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3446 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3447 if (need_benign_correction) { 3448 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3449 PetscScalar *marr,*sums; 3450 3451 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3452 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3453 for (i=0;i<reuse_solver->benign_n;i++) { 3454 const PetscScalar *vals; 3455 const PetscInt *idxs,*idxs_zero; 3456 PetscInt n,j,nz; 3457 3458 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3459 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3460 for (j=0;j<n_vertices;j++) { 3461 PetscInt k; 3462 sums[j] = 0.; 3463 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3464 } 3465 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3466 for (j=0;j<n;j++) { 3467 PetscScalar val = vals[j]; 3468 PetscInt k; 3469 for (k=0;k<n_vertices;k++) { 3470 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3471 } 3472 } 3473 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3474 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3475 } 3476 ierr = PetscFree(sums);CHKERRQ(ierr); 3477 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3478 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3479 } 3480 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3481 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3482 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3483 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3484 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3485 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3486 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3487 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3488 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3489 } else { 3490 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3491 } 3492 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3493 3494 /* coarse basis functions */ 3495 for (i=0;i<n_vertices;i++) { 3496 PetscScalar *y; 3497 3498 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3499 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3500 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3501 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3502 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3503 y[n_B*i+idx_V_B[i]] = 1.0; 3504 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3505 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3506 3507 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3508 PetscInt j; 3509 3510 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3511 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3512 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3513 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3514 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3515 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3516 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3517 } 3518 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3519 } 3520 /* if n_R == 0 the object is not destroyed */ 3521 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3522 } 3523 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3524 3525 if (n_constraints) { 3526 Mat B; 3527 3528 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3529 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3530 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3531 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3532 if (n_vertices) { 3533 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3534 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3535 } else { 3536 Mat S_VCt; 3537 3538 if (lda_rhs != n_R) { 3539 ierr = MatDestroy(&B);CHKERRQ(ierr); 3540 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3541 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3542 } 3543 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3544 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3545 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3546 } 3547 } 3548 ierr = MatDestroy(&B);CHKERRQ(ierr); 3549 /* coarse basis functions */ 3550 for (i=0;i<n_constraints;i++) { 3551 PetscScalar *y; 3552 3553 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3554 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3555 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3556 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3557 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3558 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3559 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3560 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3561 PetscInt j; 3562 3563 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3564 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3565 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3566 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3567 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3568 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3569 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3570 } 3571 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3572 } 3573 } 3574 if (n_constraints) { 3575 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3576 } 3577 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3578 3579 /* coarse matrix entries relative to B_0 */ 3580 if (pcbddc->benign_n) { 3581 Mat B0_B,B0_BPHI; 3582 IS is_dummy; 3583 PetscScalar *data; 3584 PetscInt j; 3585 3586 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3587 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3588 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3589 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3590 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3591 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3592 for (j=0;j<pcbddc->benign_n;j++) { 3593 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3594 for (i=0;i<pcbddc->local_primal_size;i++) { 3595 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3596 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3597 } 3598 } 3599 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3600 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3601 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3602 } 3603 3604 /* compute other basis functions for non-symmetric problems */ 3605 if (!pcbddc->symmetric_primal) { 3606 Mat B_V=NULL,B_C=NULL; 3607 PetscScalar *marray; 3608 3609 if (n_constraints) { 3610 Mat S_CCT,C_CRT; 3611 3612 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3613 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3614 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3615 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3616 if (n_vertices) { 3617 Mat S_VCT; 3618 3619 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3620 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3621 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3622 } 3623 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3624 } else { 3625 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3626 } 3627 if (n_vertices && n_R) { 3628 PetscScalar *av,*marray; 3629 const PetscInt *xadj,*adjncy; 3630 PetscInt n; 3631 PetscBool flg_row; 3632 3633 /* B_V = B_V - A_VR^T */ 3634 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3635 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3636 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3637 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3638 for (i=0;i<n;i++) { 3639 PetscInt j; 3640 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3641 } 3642 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3643 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3644 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3645 } 3646 3647 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3648 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3649 for (i=0;i<n_vertices;i++) { 3650 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3651 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3652 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3653 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3654 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3655 } 3656 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3657 if (B_C) { 3658 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3659 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3660 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3661 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3662 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3663 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3664 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3665 } 3666 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3667 } 3668 /* coarse basis functions */ 3669 for (i=0;i<pcbddc->local_primal_size;i++) { 3670 PetscScalar *y; 3671 3672 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3673 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3674 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3675 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3676 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3677 if (i<n_vertices) { 3678 y[n_B*i+idx_V_B[i]] = 1.0; 3679 } 3680 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3681 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3682 3683 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3684 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3685 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3686 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3687 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3688 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3689 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3690 } 3691 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3692 } 3693 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3694 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3695 } 3696 /* free memory */ 3697 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3698 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3699 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3700 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3701 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3702 ierr = PetscFree(work);CHKERRQ(ierr); 3703 if (n_vertices) { 3704 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3705 } 3706 if (n_constraints) { 3707 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3708 } 3709 /* Checking coarse_sub_mat and coarse basis functios */ 3710 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3711 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3712 if (pcbddc->dbg_flag) { 3713 Mat coarse_sub_mat; 3714 Mat AUXMAT,TM1,TM2,TM3,TM4; 3715 Mat coarse_phi_D,coarse_phi_B; 3716 Mat coarse_psi_D,coarse_psi_B; 3717 Mat A_II,A_BB,A_IB,A_BI; 3718 Mat C_B,CPHI; 3719 IS is_dummy; 3720 Vec mones; 3721 MatType checkmattype=MATSEQAIJ; 3722 PetscReal real_value; 3723 3724 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3725 Mat A; 3726 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3727 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3728 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3729 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3730 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3731 ierr = MatDestroy(&A);CHKERRQ(ierr); 3732 } else { 3733 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3734 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3735 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3736 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3737 } 3738 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3739 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3740 if (!pcbddc->symmetric_primal) { 3741 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3742 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3743 } 3744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3745 3746 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3747 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3748 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3749 if (!pcbddc->symmetric_primal) { 3750 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3751 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3752 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3753 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3754 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3755 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3756 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3757 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3758 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3759 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3760 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3761 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3762 } else { 3763 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3764 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3765 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3766 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3767 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3768 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3769 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3770 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3771 } 3772 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3773 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3774 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3775 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3776 if (pcbddc->benign_n) { 3777 Mat B0_B,B0_BPHI; 3778 PetscScalar *data,*data2; 3779 PetscInt j; 3780 3781 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3782 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3783 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3784 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3785 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3786 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3787 for (j=0;j<pcbddc->benign_n;j++) { 3788 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3789 for (i=0;i<pcbddc->local_primal_size;i++) { 3790 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3791 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3792 } 3793 } 3794 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3795 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3796 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3797 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3798 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3799 } 3800 #if 0 3801 { 3802 PetscViewer viewer; 3803 char filename[256]; 3804 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3805 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3806 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3807 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3808 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3809 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3810 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3811 if (save_change) { 3812 Mat phi_B; 3813 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3814 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3815 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3816 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3817 } else { 3818 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3819 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3820 } 3821 if (pcbddc->coarse_phi_D) { 3822 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3823 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3824 } 3825 if (pcbddc->coarse_psi_B) { 3826 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3827 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3828 } 3829 if (pcbddc->coarse_psi_D) { 3830 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3831 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3832 } 3833 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3834 } 3835 #endif 3836 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3837 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3838 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3839 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3840 3841 /* check constraints */ 3842 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3843 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3844 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3845 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3846 } else { 3847 PetscScalar *data; 3848 Mat tmat; 3849 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3850 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3851 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3852 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3853 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3854 } 3855 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3856 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3857 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3858 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3859 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3860 if (!pcbddc->symmetric_primal) { 3861 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3862 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3863 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3864 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3865 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3866 } 3867 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3868 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3869 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3870 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3871 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3872 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3873 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3874 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3875 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3876 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3877 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3878 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3879 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3880 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3881 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3882 if (!pcbddc->symmetric_primal) { 3883 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3884 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3885 } 3886 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3887 } 3888 /* get back data */ 3889 *coarse_submat_vals_n = coarse_submat_vals; 3890 PetscFunctionReturn(0); 3891 } 3892 3893 #undef __FUNCT__ 3894 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3895 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3896 { 3897 Mat *work_mat; 3898 IS isrow_s,iscol_s; 3899 PetscBool rsorted,csorted; 3900 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3901 PetscErrorCode ierr; 3902 3903 PetscFunctionBegin; 3904 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3905 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3906 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3907 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3908 3909 if (!rsorted) { 3910 const PetscInt *idxs; 3911 PetscInt *idxs_sorted,i; 3912 3913 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3914 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3915 for (i=0;i<rsize;i++) { 3916 idxs_perm_r[i] = i; 3917 } 3918 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3919 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3920 for (i=0;i<rsize;i++) { 3921 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3922 } 3923 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3924 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3925 } else { 3926 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3927 isrow_s = isrow; 3928 } 3929 3930 if (!csorted) { 3931 if (isrow == iscol) { 3932 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3933 iscol_s = isrow_s; 3934 } else { 3935 const PetscInt *idxs; 3936 PetscInt *idxs_sorted,i; 3937 3938 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3939 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3940 for (i=0;i<csize;i++) { 3941 idxs_perm_c[i] = i; 3942 } 3943 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3944 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3945 for (i=0;i<csize;i++) { 3946 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3947 } 3948 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3949 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3950 } 3951 } else { 3952 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3953 iscol_s = iscol; 3954 } 3955 3956 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3957 3958 if (!rsorted || !csorted) { 3959 Mat new_mat; 3960 IS is_perm_r,is_perm_c; 3961 3962 if (!rsorted) { 3963 PetscInt *idxs_r,i; 3964 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3965 for (i=0;i<rsize;i++) { 3966 idxs_r[idxs_perm_r[i]] = i; 3967 } 3968 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3969 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3970 } else { 3971 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3972 } 3973 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3974 3975 if (!csorted) { 3976 if (isrow_s == iscol_s) { 3977 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3978 is_perm_c = is_perm_r; 3979 } else { 3980 PetscInt *idxs_c,i; 3981 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 3982 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3983 for (i=0;i<csize;i++) { 3984 idxs_c[idxs_perm_c[i]] = i; 3985 } 3986 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3987 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3988 } 3989 } else { 3990 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3991 } 3992 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3993 3994 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3995 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3996 work_mat[0] = new_mat; 3997 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3998 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3999 } 4000 4001 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4002 *B = work_mat[0]; 4003 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4004 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4005 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4006 PetscFunctionReturn(0); 4007 } 4008 4009 #undef __FUNCT__ 4010 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 4011 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4012 { 4013 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4014 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4015 Mat new_mat; 4016 IS is_local,is_global; 4017 PetscInt local_size; 4018 PetscBool isseqaij; 4019 PetscErrorCode ierr; 4020 4021 PetscFunctionBegin; 4022 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4023 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4024 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4025 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4026 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4027 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4028 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4029 4030 /* check */ 4031 if (pcbddc->dbg_flag) { 4032 Vec x,x_change; 4033 PetscReal error; 4034 4035 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4036 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4037 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4038 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4039 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4040 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4041 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4042 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4043 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4044 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4045 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4046 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 4047 ierr = VecDestroy(&x);CHKERRQ(ierr); 4048 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4049 } 4050 4051 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4052 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4053 if (isseqaij) { 4054 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4055 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4056 } else { 4057 Mat work_mat; 4058 4059 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4060 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4061 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4062 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4063 } 4064 if (matis->A->symmetric_set) { 4065 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4066 #if !defined(PETSC_USE_COMPLEX) 4067 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4068 #endif 4069 } 4070 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4071 PetscFunctionReturn(0); 4072 } 4073 4074 #undef __FUNCT__ 4075 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 4076 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4077 { 4078 PC_IS* pcis = (PC_IS*)(pc->data); 4079 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4080 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4081 PetscInt *idx_R_local=NULL; 4082 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4083 PetscInt vbs,bs; 4084 PetscBT bitmask=NULL; 4085 PetscErrorCode ierr; 4086 4087 PetscFunctionBegin; 4088 /* 4089 No need to setup local scatters if 4090 - primal space is unchanged 4091 AND 4092 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4093 AND 4094 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4095 */ 4096 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4097 PetscFunctionReturn(0); 4098 } 4099 /* destroy old objects */ 4100 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4101 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4102 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4103 /* Set Non-overlapping dimensions */ 4104 n_B = pcis->n_B; 4105 n_D = pcis->n - n_B; 4106 n_vertices = pcbddc->n_vertices; 4107 4108 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4109 4110 /* create auxiliary bitmask and allocate workspace */ 4111 if (!sub_schurs || !sub_schurs->reuse_solver) { 4112 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4113 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4114 for (i=0;i<n_vertices;i++) { 4115 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4116 } 4117 4118 for (i=0, n_R=0; i<pcis->n; i++) { 4119 if (!PetscBTLookup(bitmask,i)) { 4120 idx_R_local[n_R++] = i; 4121 } 4122 } 4123 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4124 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4125 4126 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4127 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4128 } 4129 4130 /* Block code */ 4131 vbs = 1; 4132 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4133 if (bs>1 && !(n_vertices%bs)) { 4134 PetscBool is_blocked = PETSC_TRUE; 4135 PetscInt *vary; 4136 if (!sub_schurs || !sub_schurs->reuse_solver) { 4137 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4138 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4139 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4140 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4141 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4142 for (i=0; i<pcis->n/bs; i++) { 4143 if (vary[i]!=0 && vary[i]!=bs) { 4144 is_blocked = PETSC_FALSE; 4145 break; 4146 } 4147 } 4148 ierr = PetscFree(vary);CHKERRQ(ierr); 4149 } else { 4150 /* Verify directly the R set */ 4151 for (i=0; i<n_R/bs; i++) { 4152 PetscInt j,node=idx_R_local[bs*i]; 4153 for (j=1; j<bs; j++) { 4154 if (node != idx_R_local[bs*i+j]-j) { 4155 is_blocked = PETSC_FALSE; 4156 break; 4157 } 4158 } 4159 } 4160 } 4161 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4162 vbs = bs; 4163 for (i=0;i<n_R/vbs;i++) { 4164 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4165 } 4166 } 4167 } 4168 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4169 if (sub_schurs && sub_schurs->reuse_solver) { 4170 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4171 4172 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4173 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4174 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4175 reuse_solver->is_R = pcbddc->is_R_local; 4176 } else { 4177 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4178 } 4179 4180 /* print some info if requested */ 4181 if (pcbddc->dbg_flag) { 4182 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4183 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4184 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4185 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4187 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4188 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4189 } 4190 4191 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4192 if (!sub_schurs || !sub_schurs->reuse_solver) { 4193 IS is_aux1,is_aux2; 4194 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4195 4196 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4197 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4198 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4199 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4200 for (i=0; i<n_D; i++) { 4201 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4202 } 4203 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4204 for (i=0, j=0; i<n_R; i++) { 4205 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4206 aux_array1[j++] = i; 4207 } 4208 } 4209 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4210 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4211 for (i=0, j=0; i<n_B; i++) { 4212 if (!PetscBTLookup(bitmask,is_indices[i])) { 4213 aux_array2[j++] = i; 4214 } 4215 } 4216 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4217 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4218 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4219 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4220 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4221 4222 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4223 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4224 for (i=0, j=0; i<n_R; i++) { 4225 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4226 aux_array1[j++] = i; 4227 } 4228 } 4229 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4230 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4231 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4232 } 4233 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4234 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4235 } else { 4236 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4237 IS tis; 4238 PetscInt schur_size; 4239 4240 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4241 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4242 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4243 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4244 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4245 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4246 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4247 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4248 } 4249 } 4250 PetscFunctionReturn(0); 4251 } 4252 4253 4254 #undef __FUNCT__ 4255 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4256 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4257 { 4258 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4259 PC_IS *pcis = (PC_IS*)pc->data; 4260 PC pc_temp; 4261 Mat A_RR; 4262 MatReuse reuse; 4263 PetscScalar m_one = -1.0; 4264 PetscReal value; 4265 PetscInt n_D,n_R; 4266 PetscBool check_corr[2],issbaij; 4267 PetscErrorCode ierr; 4268 /* prefixes stuff */ 4269 char dir_prefix[256],neu_prefix[256],str_level[16]; 4270 size_t len; 4271 4272 PetscFunctionBegin; 4273 4274 /* compute prefixes */ 4275 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4276 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4277 if (!pcbddc->current_level) { 4278 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4279 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4280 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4281 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4282 } else { 4283 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4284 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4285 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4286 len -= 15; /* remove "pc_bddc_coarse_" */ 4287 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4288 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4289 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4290 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4291 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4292 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4293 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4294 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4295 } 4296 4297 /* DIRICHLET PROBLEM */ 4298 if (dirichlet) { 4299 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4300 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4301 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4302 if (pcbddc->dbg_flag) { 4303 Mat A_IIn; 4304 4305 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4306 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4307 pcis->A_II = A_IIn; 4308 } 4309 } 4310 if (pcbddc->local_mat->symmetric_set) { 4311 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4312 } 4313 /* Matrix for Dirichlet problem is pcis->A_II */ 4314 n_D = pcis->n - pcis->n_B; 4315 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4316 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4317 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4318 /* default */ 4319 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4320 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4321 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4322 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4323 if (issbaij) { 4324 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4325 } else { 4326 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4327 } 4328 /* Allow user's customization */ 4329 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4330 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4331 } 4332 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4333 if (sub_schurs && sub_schurs->reuse_solver) { 4334 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4335 4336 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4337 } 4338 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4339 if (!n_D) { 4340 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4341 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4342 } 4343 /* Set Up KSP for Dirichlet problem of BDDC */ 4344 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4345 /* set ksp_D into pcis data */ 4346 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4347 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4348 pcis->ksp_D = pcbddc->ksp_D; 4349 } 4350 4351 /* NEUMANN PROBLEM */ 4352 A_RR = 0; 4353 if (neumann) { 4354 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4355 PetscInt ibs,mbs; 4356 PetscBool issbaij; 4357 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4358 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4359 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4360 if (pcbddc->ksp_R) { /* already created ksp */ 4361 PetscInt nn_R; 4362 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4363 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4364 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4365 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4366 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4367 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4368 reuse = MAT_INITIAL_MATRIX; 4369 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4370 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4371 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4372 reuse = MAT_INITIAL_MATRIX; 4373 } else { /* safe to reuse the matrix */ 4374 reuse = MAT_REUSE_MATRIX; 4375 } 4376 } 4377 /* last check */ 4378 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4379 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4380 reuse = MAT_INITIAL_MATRIX; 4381 } 4382 } else { /* first time, so we need to create the matrix */ 4383 reuse = MAT_INITIAL_MATRIX; 4384 } 4385 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4386 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4387 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4388 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4389 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4390 if (matis->A == pcbddc->local_mat) { 4391 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4392 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4393 } else { 4394 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4395 } 4396 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4397 if (matis->A == pcbddc->local_mat) { 4398 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4399 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4400 } else { 4401 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4402 } 4403 } 4404 /* extract A_RR */ 4405 if (sub_schurs && sub_schurs->reuse_solver) { 4406 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4407 4408 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4409 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4410 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4411 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4412 } else { 4413 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4414 } 4415 } else { 4416 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4417 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4418 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4419 } 4420 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4421 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4422 } 4423 if (pcbddc->local_mat->symmetric_set) { 4424 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4425 } 4426 if (!pcbddc->ksp_R) { /* create object if not present */ 4427 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4428 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4429 /* default */ 4430 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4431 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4432 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4433 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4434 if (issbaij) { 4435 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4436 } else { 4437 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4438 } 4439 /* Allow user's customization */ 4440 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4441 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4442 } 4443 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4444 if (!n_R) { 4445 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4446 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4447 } 4448 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4449 /* Reuse solver if it is present */ 4450 if (sub_schurs && sub_schurs->reuse_solver) { 4451 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4452 4453 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4454 } 4455 /* Set Up KSP for Neumann problem of BDDC */ 4456 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4457 } 4458 4459 if (pcbddc->dbg_flag) { 4460 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4461 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4462 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4463 } 4464 4465 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4466 check_corr[0] = check_corr[1] = PETSC_FALSE; 4467 if (pcbddc->NullSpace_corr[0]) { 4468 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4469 } 4470 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4471 check_corr[0] = PETSC_TRUE; 4472 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4473 } 4474 if (neumann && pcbddc->NullSpace_corr[2]) { 4475 check_corr[1] = PETSC_TRUE; 4476 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4477 } 4478 4479 /* check Dirichlet and Neumann solvers */ 4480 if (pcbddc->dbg_flag) { 4481 if (dirichlet) { /* Dirichlet */ 4482 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4483 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4484 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4485 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4486 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4487 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4488 if (check_corr[0]) { 4489 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4490 } 4491 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4492 } 4493 if (neumann) { /* Neumann */ 4494 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4495 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4496 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4497 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4498 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4499 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4500 if (check_corr[1]) { 4501 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4502 } 4503 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4504 } 4505 } 4506 /* free Neumann problem's matrix */ 4507 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4508 PetscFunctionReturn(0); 4509 } 4510 4511 #undef __FUNCT__ 4512 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4513 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4514 { 4515 PetscErrorCode ierr; 4516 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4517 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4518 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4519 4520 PetscFunctionBegin; 4521 if (!reuse_solver) { 4522 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4523 } 4524 if (!pcbddc->switch_static) { 4525 if (applytranspose && pcbddc->local_auxmat1) { 4526 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4527 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4528 } 4529 if (!reuse_solver) { 4530 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4531 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4532 } else { 4533 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4534 4535 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4536 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4537 } 4538 } else { 4539 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4540 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4541 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4542 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4543 if (applytranspose && pcbddc->local_auxmat1) { 4544 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4545 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4546 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4547 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4548 } 4549 } 4550 if (!reuse_solver || pcbddc->switch_static) { 4551 if (applytranspose) { 4552 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4553 } else { 4554 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4555 } 4556 } else { 4557 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4558 4559 if (applytranspose) { 4560 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4561 } else { 4562 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4563 } 4564 } 4565 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4566 if (!pcbddc->switch_static) { 4567 if (!reuse_solver) { 4568 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4569 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4570 } else { 4571 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4572 4573 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4574 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4575 } 4576 if (!applytranspose && pcbddc->local_auxmat1) { 4577 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4578 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4579 } 4580 } else { 4581 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4582 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4583 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4584 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4585 if (!applytranspose && pcbddc->local_auxmat1) { 4586 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4587 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4588 } 4589 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4590 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4591 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4592 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4593 } 4594 PetscFunctionReturn(0); 4595 } 4596 4597 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4598 #undef __FUNCT__ 4599 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4600 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4601 { 4602 PetscErrorCode ierr; 4603 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4604 PC_IS* pcis = (PC_IS*) (pc->data); 4605 const PetscScalar zero = 0.0; 4606 4607 PetscFunctionBegin; 4608 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4609 if (!pcbddc->benign_apply_coarse_only) { 4610 if (applytranspose) { 4611 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4612 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4613 } else { 4614 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4615 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4616 } 4617 } else { 4618 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4619 } 4620 4621 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4622 if (pcbddc->benign_n) { 4623 PetscScalar *array; 4624 PetscInt j; 4625 4626 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4627 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4628 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4629 } 4630 4631 /* start communications from local primal nodes to rhs of coarse solver */ 4632 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4633 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4634 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4635 4636 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4637 if (pcbddc->coarse_ksp) { 4638 Mat coarse_mat; 4639 Vec rhs,sol; 4640 MatNullSpace nullsp; 4641 PetscBool isbddc = PETSC_FALSE; 4642 4643 if (pcbddc->benign_have_null) { 4644 PC coarse_pc; 4645 4646 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4647 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4648 /* we need to propagate to coarser levels the need for a possible benign correction */ 4649 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4650 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4651 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4652 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4653 } 4654 } 4655 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4656 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4657 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4658 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4659 if (nullsp) { 4660 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4661 } 4662 if (applytranspose) { 4663 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4664 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4665 } else { 4666 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4667 PC coarse_pc; 4668 4669 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4670 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4671 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4672 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4673 } else { 4674 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4675 } 4676 } 4677 /* we don't need the benign correction at coarser levels anymore */ 4678 if (pcbddc->benign_have_null && isbddc) { 4679 PC coarse_pc; 4680 PC_BDDC* coarsepcbddc; 4681 4682 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4683 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4684 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4685 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4686 } 4687 if (nullsp) { 4688 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4689 } 4690 } 4691 4692 /* Local solution on R nodes */ 4693 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4694 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4695 } 4696 /* communications from coarse sol to local primal nodes */ 4697 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4698 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4699 4700 /* Sum contributions from the two levels */ 4701 if (!pcbddc->benign_apply_coarse_only) { 4702 if (applytranspose) { 4703 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4704 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4705 } else { 4706 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4707 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4708 } 4709 /* store p0 */ 4710 if (pcbddc->benign_n) { 4711 PetscScalar *array; 4712 PetscInt j; 4713 4714 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4715 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4716 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4717 } 4718 } else { /* expand the coarse solution */ 4719 if (applytranspose) { 4720 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4721 } else { 4722 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4723 } 4724 } 4725 PetscFunctionReturn(0); 4726 } 4727 4728 #undef __FUNCT__ 4729 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4730 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4731 { 4732 PetscErrorCode ierr; 4733 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4734 PetscScalar *array; 4735 Vec from,to; 4736 4737 PetscFunctionBegin; 4738 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4739 from = pcbddc->coarse_vec; 4740 to = pcbddc->vec1_P; 4741 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4742 Vec tvec; 4743 4744 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4745 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4746 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4747 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4748 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4749 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4750 } 4751 } else { /* from local to global -> put data in coarse right hand side */ 4752 from = pcbddc->vec1_P; 4753 to = pcbddc->coarse_vec; 4754 } 4755 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4756 PetscFunctionReturn(0); 4757 } 4758 4759 #undef __FUNCT__ 4760 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4761 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4762 { 4763 PetscErrorCode ierr; 4764 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4765 PetscScalar *array; 4766 Vec from,to; 4767 4768 PetscFunctionBegin; 4769 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4770 from = pcbddc->coarse_vec; 4771 to = pcbddc->vec1_P; 4772 } else { /* from local to global -> put data in coarse right hand side */ 4773 from = pcbddc->vec1_P; 4774 to = pcbddc->coarse_vec; 4775 } 4776 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4777 if (smode == SCATTER_FORWARD) { 4778 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4779 Vec tvec; 4780 4781 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4782 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4783 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4784 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4785 } 4786 } else { 4787 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4788 ierr = VecResetArray(from);CHKERRQ(ierr); 4789 } 4790 } 4791 PetscFunctionReturn(0); 4792 } 4793 4794 /* uncomment for testing purposes */ 4795 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4796 #undef __FUNCT__ 4797 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4798 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4799 { 4800 PetscErrorCode ierr; 4801 PC_IS* pcis = (PC_IS*)(pc->data); 4802 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4803 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4804 /* one and zero */ 4805 PetscScalar one=1.0,zero=0.0; 4806 /* space to store constraints and their local indices */ 4807 PetscScalar *constraints_data; 4808 PetscInt *constraints_idxs,*constraints_idxs_B; 4809 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4810 PetscInt *constraints_n; 4811 /* iterators */ 4812 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4813 /* BLAS integers */ 4814 PetscBLASInt lwork,lierr; 4815 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4816 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4817 /* reuse */ 4818 PetscInt olocal_primal_size,olocal_primal_size_cc; 4819 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4820 /* change of basis */ 4821 PetscBool qr_needed; 4822 PetscBT change_basis,qr_needed_idx; 4823 /* auxiliary stuff */ 4824 PetscInt *nnz,*is_indices; 4825 PetscInt ncc; 4826 /* some quantities */ 4827 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4828 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4829 4830 PetscFunctionBegin; 4831 /* Destroy Mat objects computed previously */ 4832 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4833 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4834 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4835 /* save info on constraints from previous setup (if any) */ 4836 olocal_primal_size = pcbddc->local_primal_size; 4837 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4838 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4839 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4840 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4841 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4842 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4843 4844 if (!pcbddc->adaptive_selection) { 4845 IS ISForVertices,*ISForFaces,*ISForEdges; 4846 MatNullSpace nearnullsp; 4847 const Vec *nearnullvecs; 4848 Vec *localnearnullsp; 4849 PetscScalar *array; 4850 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4851 PetscBool nnsp_has_cnst; 4852 /* LAPACK working arrays for SVD or POD */ 4853 PetscBool skip_lapack,boolforchange; 4854 PetscScalar *work; 4855 PetscReal *singular_vals; 4856 #if defined(PETSC_USE_COMPLEX) 4857 PetscReal *rwork; 4858 #endif 4859 #if defined(PETSC_MISSING_LAPACK_GESVD) 4860 PetscScalar *temp_basis,*correlation_mat; 4861 #else 4862 PetscBLASInt dummy_int=1; 4863 PetscScalar dummy_scalar=1.; 4864 #endif 4865 4866 /* Get index sets for faces, edges and vertices from graph */ 4867 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4868 /* print some info */ 4869 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 4870 PetscInt nv; 4871 4872 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4873 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4874 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4875 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4876 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4878 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4879 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4880 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4881 } 4882 4883 /* free unneeded index sets */ 4884 if (!pcbddc->use_vertices) { 4885 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4886 } 4887 if (!pcbddc->use_edges) { 4888 for (i=0;i<n_ISForEdges;i++) { 4889 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4890 } 4891 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4892 n_ISForEdges = 0; 4893 } 4894 if (!pcbddc->use_faces) { 4895 for (i=0;i<n_ISForFaces;i++) { 4896 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4897 } 4898 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4899 n_ISForFaces = 0; 4900 } 4901 4902 /* check if near null space is attached to global mat */ 4903 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4904 if (nearnullsp) { 4905 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4906 /* remove any stored info */ 4907 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4908 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4909 /* store information for BDDC solver reuse */ 4910 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4911 pcbddc->onearnullspace = nearnullsp; 4912 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4913 for (i=0;i<nnsp_size;i++) { 4914 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4915 } 4916 } else { /* if near null space is not provided BDDC uses constants by default */ 4917 nnsp_size = 0; 4918 nnsp_has_cnst = PETSC_TRUE; 4919 } 4920 /* get max number of constraints on a single cc */ 4921 max_constraints = nnsp_size; 4922 if (nnsp_has_cnst) max_constraints++; 4923 4924 /* 4925 Evaluate maximum storage size needed by the procedure 4926 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4927 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4928 There can be multiple constraints per connected component 4929 */ 4930 n_vertices = 0; 4931 if (ISForVertices) { 4932 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4933 } 4934 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4935 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4936 4937 total_counts = n_ISForFaces+n_ISForEdges; 4938 total_counts *= max_constraints; 4939 total_counts += n_vertices; 4940 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4941 4942 total_counts = 0; 4943 max_size_of_constraint = 0; 4944 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4945 IS used_is; 4946 if (i<n_ISForEdges) { 4947 used_is = ISForEdges[i]; 4948 } else { 4949 used_is = ISForFaces[i-n_ISForEdges]; 4950 } 4951 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4952 total_counts += j; 4953 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4954 } 4955 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4956 4957 /* get local part of global near null space vectors */ 4958 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4959 for (k=0;k<nnsp_size;k++) { 4960 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4961 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4962 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4963 } 4964 4965 /* whether or not to skip lapack calls */ 4966 skip_lapack = PETSC_TRUE; 4967 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4968 4969 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4970 if (!skip_lapack) { 4971 PetscScalar temp_work; 4972 4973 #if defined(PETSC_MISSING_LAPACK_GESVD) 4974 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4975 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4976 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4977 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4978 #if defined(PETSC_USE_COMPLEX) 4979 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4980 #endif 4981 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4982 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4983 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4984 lwork = -1; 4985 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4986 #if !defined(PETSC_USE_COMPLEX) 4987 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4988 #else 4989 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4990 #endif 4991 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4992 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4993 #else /* on missing GESVD */ 4994 /* SVD */ 4995 PetscInt max_n,min_n; 4996 max_n = max_size_of_constraint; 4997 min_n = max_constraints; 4998 if (max_size_of_constraint < max_constraints) { 4999 min_n = max_size_of_constraint; 5000 max_n = max_constraints; 5001 } 5002 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5003 #if defined(PETSC_USE_COMPLEX) 5004 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5005 #endif 5006 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5007 lwork = -1; 5008 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5009 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5010 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5011 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5012 #if !defined(PETSC_USE_COMPLEX) 5013 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5014 #else 5015 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5016 #endif 5017 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5018 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5019 #endif /* on missing GESVD */ 5020 /* Allocate optimal workspace */ 5021 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5022 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5023 } 5024 /* Now we can loop on constraining sets */ 5025 total_counts = 0; 5026 constraints_idxs_ptr[0] = 0; 5027 constraints_data_ptr[0] = 0; 5028 /* vertices */ 5029 if (n_vertices) { 5030 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5031 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5032 for (i=0;i<n_vertices;i++) { 5033 constraints_n[total_counts] = 1; 5034 constraints_data[total_counts] = 1.0; 5035 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5036 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5037 total_counts++; 5038 } 5039 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5040 n_vertices = total_counts; 5041 } 5042 5043 /* edges and faces */ 5044 total_counts_cc = total_counts; 5045 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5046 IS used_is; 5047 PetscBool idxs_copied = PETSC_FALSE; 5048 5049 if (ncc<n_ISForEdges) { 5050 used_is = ISForEdges[ncc]; 5051 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5052 } else { 5053 used_is = ISForFaces[ncc-n_ISForEdges]; 5054 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5055 } 5056 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5057 5058 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5059 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5060 /* change of basis should not be performed on local periodic nodes */ 5061 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5062 if (nnsp_has_cnst) { 5063 PetscScalar quad_value; 5064 5065 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5066 idxs_copied = PETSC_TRUE; 5067 5068 if (!pcbddc->use_nnsp_true) { 5069 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5070 } else { 5071 quad_value = 1.0; 5072 } 5073 for (j=0;j<size_of_constraint;j++) { 5074 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5075 } 5076 temp_constraints++; 5077 total_counts++; 5078 } 5079 for (k=0;k<nnsp_size;k++) { 5080 PetscReal real_value; 5081 PetscScalar *ptr_to_data; 5082 5083 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5084 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5085 for (j=0;j<size_of_constraint;j++) { 5086 ptr_to_data[j] = array[is_indices[j]]; 5087 } 5088 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5089 /* check if array is null on the connected component */ 5090 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5091 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5092 if (real_value > 0.0) { /* keep indices and values */ 5093 temp_constraints++; 5094 total_counts++; 5095 if (!idxs_copied) { 5096 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5097 idxs_copied = PETSC_TRUE; 5098 } 5099 } 5100 } 5101 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5102 valid_constraints = temp_constraints; 5103 if (!pcbddc->use_nnsp_true && temp_constraints) { 5104 if (temp_constraints == 1) { /* just normalize the constraint */ 5105 PetscScalar norm,*ptr_to_data; 5106 5107 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5108 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5109 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5110 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5111 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5112 } else { /* perform SVD */ 5113 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5114 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5115 5116 #if defined(PETSC_MISSING_LAPACK_GESVD) 5117 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5118 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5119 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5120 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5121 from that computed using LAPACKgesvd 5122 -> This is due to a different computation of eigenvectors in LAPACKheev 5123 -> The quality of the POD-computed basis will be the same */ 5124 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5125 /* Store upper triangular part of correlation matrix */ 5126 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5127 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5128 for (j=0;j<temp_constraints;j++) { 5129 for (k=0;k<j+1;k++) { 5130 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5131 } 5132 } 5133 /* compute eigenvalues and eigenvectors of correlation matrix */ 5134 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5135 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5136 #if !defined(PETSC_USE_COMPLEX) 5137 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5138 #else 5139 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5140 #endif 5141 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5142 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5143 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5144 j = 0; 5145 while (j < temp_constraints && singular_vals[j] < tol) j++; 5146 total_counts = total_counts-j; 5147 valid_constraints = temp_constraints-j; 5148 /* scale and copy POD basis into used quadrature memory */ 5149 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5150 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5151 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5152 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5153 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5154 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5155 if (j<temp_constraints) { 5156 PetscInt ii; 5157 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5158 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5159 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5160 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5161 for (k=0;k<temp_constraints-j;k++) { 5162 for (ii=0;ii<size_of_constraint;ii++) { 5163 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5164 } 5165 } 5166 } 5167 #else /* on missing GESVD */ 5168 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5169 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5170 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5171 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5172 #if !defined(PETSC_USE_COMPLEX) 5173 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5174 #else 5175 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5176 #endif 5177 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5178 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5179 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5180 k = temp_constraints; 5181 if (k > size_of_constraint) k = size_of_constraint; 5182 j = 0; 5183 while (j < k && singular_vals[k-j-1] < tol) j++; 5184 valid_constraints = k-j; 5185 total_counts = total_counts-temp_constraints+valid_constraints; 5186 #endif /* on missing GESVD */ 5187 } 5188 } 5189 /* update pointers information */ 5190 if (valid_constraints) { 5191 constraints_n[total_counts_cc] = valid_constraints; 5192 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5193 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5194 /* set change_of_basis flag */ 5195 if (boolforchange) { 5196 PetscBTSet(change_basis,total_counts_cc); 5197 } 5198 total_counts_cc++; 5199 } 5200 } 5201 /* free workspace */ 5202 if (!skip_lapack) { 5203 ierr = PetscFree(work);CHKERRQ(ierr); 5204 #if defined(PETSC_USE_COMPLEX) 5205 ierr = PetscFree(rwork);CHKERRQ(ierr); 5206 #endif 5207 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5208 #if defined(PETSC_MISSING_LAPACK_GESVD) 5209 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5210 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5211 #endif 5212 } 5213 for (k=0;k<nnsp_size;k++) { 5214 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5215 } 5216 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5217 /* free index sets of faces, edges and vertices */ 5218 for (i=0;i<n_ISForFaces;i++) { 5219 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5220 } 5221 if (n_ISForFaces) { 5222 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5223 } 5224 for (i=0;i<n_ISForEdges;i++) { 5225 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5226 } 5227 if (n_ISForEdges) { 5228 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5229 } 5230 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5231 } else { 5232 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5233 5234 total_counts = 0; 5235 n_vertices = 0; 5236 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5237 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5238 } 5239 max_constraints = 0; 5240 total_counts_cc = 0; 5241 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5242 total_counts += pcbddc->adaptive_constraints_n[i]; 5243 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5244 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5245 } 5246 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5247 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5248 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5249 constraints_data = pcbddc->adaptive_constraints_data; 5250 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5251 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5252 total_counts_cc = 0; 5253 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5254 if (pcbddc->adaptive_constraints_n[i]) { 5255 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5256 } 5257 } 5258 #if 0 5259 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5260 for (i=0;i<total_counts_cc;i++) { 5261 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5262 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5263 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5264 printf(" %d",constraints_idxs[j]); 5265 } 5266 printf("\n"); 5267 printf("number of cc: %d\n",constraints_n[i]); 5268 } 5269 for (i=0;i<n_vertices;i++) { 5270 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5271 } 5272 for (i=0;i<sub_schurs->n_subs;i++) { 5273 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5274 } 5275 #endif 5276 5277 max_size_of_constraint = 0; 5278 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5279 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5280 /* Change of basis */ 5281 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5282 if (pcbddc->use_change_of_basis) { 5283 for (i=0;i<sub_schurs->n_subs;i++) { 5284 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5285 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5286 } 5287 } 5288 } 5289 } 5290 pcbddc->local_primal_size = total_counts; 5291 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5292 5293 /* map constraints_idxs in boundary numbering */ 5294 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5295 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5296 5297 /* Create constraint matrix */ 5298 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5299 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5300 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5301 5302 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5303 /* determine if a QR strategy is needed for change of basis */ 5304 qr_needed = PETSC_FALSE; 5305 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5306 total_primal_vertices=0; 5307 pcbddc->local_primal_size_cc = 0; 5308 for (i=0;i<total_counts_cc;i++) { 5309 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5310 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5311 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5312 pcbddc->local_primal_size_cc += 1; 5313 } else if (PetscBTLookup(change_basis,i)) { 5314 for (k=0;k<constraints_n[i];k++) { 5315 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5316 } 5317 pcbddc->local_primal_size_cc += constraints_n[i]; 5318 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5319 PetscBTSet(qr_needed_idx,i); 5320 qr_needed = PETSC_TRUE; 5321 } 5322 } else { 5323 pcbddc->local_primal_size_cc += 1; 5324 } 5325 } 5326 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5327 pcbddc->n_vertices = total_primal_vertices; 5328 /* permute indices in order to have a sorted set of vertices */ 5329 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5330 5331 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5332 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5333 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5334 5335 /* nonzero structure of constraint matrix */ 5336 /* and get reference dof for local constraints */ 5337 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5338 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5339 5340 j = total_primal_vertices; 5341 total_counts = total_primal_vertices; 5342 cum = total_primal_vertices; 5343 for (i=n_vertices;i<total_counts_cc;i++) { 5344 if (!PetscBTLookup(change_basis,i)) { 5345 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5346 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5347 cum++; 5348 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5349 for (k=0;k<constraints_n[i];k++) { 5350 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5351 nnz[j+k] = size_of_constraint; 5352 } 5353 j += constraints_n[i]; 5354 } 5355 } 5356 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5357 ierr = PetscFree(nnz);CHKERRQ(ierr); 5358 5359 /* set values in constraint matrix */ 5360 for (i=0;i<total_primal_vertices;i++) { 5361 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5362 } 5363 total_counts = total_primal_vertices; 5364 for (i=n_vertices;i<total_counts_cc;i++) { 5365 if (!PetscBTLookup(change_basis,i)) { 5366 PetscInt *cols; 5367 5368 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5369 cols = constraints_idxs+constraints_idxs_ptr[i]; 5370 for (k=0;k<constraints_n[i];k++) { 5371 PetscInt row = total_counts+k; 5372 PetscScalar *vals; 5373 5374 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5375 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5376 } 5377 total_counts += constraints_n[i]; 5378 } 5379 } 5380 /* assembling */ 5381 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5382 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5383 5384 /* 5385 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5386 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5387 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5388 */ 5389 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5390 if (pcbddc->use_change_of_basis) { 5391 /* dual and primal dofs on a single cc */ 5392 PetscInt dual_dofs,primal_dofs; 5393 /* working stuff for GEQRF */ 5394 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5395 PetscBLASInt lqr_work; 5396 /* working stuff for UNGQR */ 5397 PetscScalar *gqr_work,lgqr_work_t; 5398 PetscBLASInt lgqr_work; 5399 /* working stuff for TRTRS */ 5400 PetscScalar *trs_rhs; 5401 PetscBLASInt Blas_NRHS; 5402 /* pointers for values insertion into change of basis matrix */ 5403 PetscInt *start_rows,*start_cols; 5404 PetscScalar *start_vals; 5405 /* working stuff for values insertion */ 5406 PetscBT is_primal; 5407 PetscInt *aux_primal_numbering_B; 5408 /* matrix sizes */ 5409 PetscInt global_size,local_size; 5410 /* temporary change of basis */ 5411 Mat localChangeOfBasisMatrix; 5412 /* extra space for debugging */ 5413 PetscScalar *dbg_work; 5414 5415 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5416 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5417 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5418 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5419 /* nonzeros for local mat */ 5420 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5421 if (!pcbddc->benign_change || pcbddc->fake_change) { 5422 for (i=0;i<pcis->n;i++) nnz[i]=1; 5423 } else { 5424 const PetscInt *ii; 5425 PetscInt n; 5426 PetscBool flg_row; 5427 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5428 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5429 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5430 } 5431 for (i=n_vertices;i<total_counts_cc;i++) { 5432 if (PetscBTLookup(change_basis,i)) { 5433 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5434 if (PetscBTLookup(qr_needed_idx,i)) { 5435 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5436 } else { 5437 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5438 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5439 } 5440 } 5441 } 5442 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5443 ierr = PetscFree(nnz);CHKERRQ(ierr); 5444 /* Set interior change in the matrix */ 5445 if (!pcbddc->benign_change || pcbddc->fake_change) { 5446 for (i=0;i<pcis->n;i++) { 5447 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5448 } 5449 } else { 5450 const PetscInt *ii,*jj; 5451 PetscScalar *aa; 5452 PetscInt n; 5453 PetscBool flg_row; 5454 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5455 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5456 for (i=0;i<n;i++) { 5457 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5458 } 5459 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5460 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5461 } 5462 5463 if (pcbddc->dbg_flag) { 5464 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5465 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5466 } 5467 5468 5469 /* Now we loop on the constraints which need a change of basis */ 5470 /* 5471 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5472 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5473 5474 Basic blocks of change of basis matrix T computed by 5475 5476 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5477 5478 | 1 0 ... 0 s_1/S | 5479 | 0 1 ... 0 s_2/S | 5480 | ... | 5481 | 0 ... 1 s_{n-1}/S | 5482 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5483 5484 with S = \sum_{i=1}^n s_i^2 5485 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5486 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5487 5488 - QR decomposition of constraints otherwise 5489 */ 5490 if (qr_needed) { 5491 /* space to store Q */ 5492 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5493 /* array to store scaling factors for reflectors */ 5494 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5495 /* first we issue queries for optimal work */ 5496 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5497 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5498 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5499 lqr_work = -1; 5500 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5501 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5502 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5503 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5504 lgqr_work = -1; 5505 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5506 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5507 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5508 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5509 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5510 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5511 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5512 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5513 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5514 /* array to store rhs and solution of triangular solver */ 5515 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5516 /* allocating workspace for check */ 5517 if (pcbddc->dbg_flag) { 5518 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5519 } 5520 } 5521 /* array to store whether a node is primal or not */ 5522 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5523 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5524 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5525 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5526 for (i=0;i<total_primal_vertices;i++) { 5527 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5528 } 5529 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5530 5531 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5532 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5533 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5534 if (PetscBTLookup(change_basis,total_counts)) { 5535 /* get constraint info */ 5536 primal_dofs = constraints_n[total_counts]; 5537 dual_dofs = size_of_constraint-primal_dofs; 5538 5539 if (pcbddc->dbg_flag) { 5540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5541 } 5542 5543 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5544 5545 /* copy quadrature constraints for change of basis check */ 5546 if (pcbddc->dbg_flag) { 5547 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5548 } 5549 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5550 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5551 5552 /* compute QR decomposition of constraints */ 5553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5554 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5555 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5556 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5557 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5558 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5559 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5560 5561 /* explictly compute R^-T */ 5562 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5563 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5564 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5565 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5566 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5567 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5568 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5569 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5570 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5571 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5572 5573 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5574 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5575 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5576 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5577 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5578 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5579 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5580 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5581 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5582 5583 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5584 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5585 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5586 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5587 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5588 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5589 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5590 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5591 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5592 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5593 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5594 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5595 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5596 5597 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5598 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5599 /* insert cols for primal dofs */ 5600 for (j=0;j<primal_dofs;j++) { 5601 start_vals = &qr_basis[j*size_of_constraint]; 5602 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5603 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5604 } 5605 /* insert cols for dual dofs */ 5606 for (j=0,k=0;j<dual_dofs;k++) { 5607 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5608 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5609 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5610 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5611 j++; 5612 } 5613 } 5614 5615 /* check change of basis */ 5616 if (pcbddc->dbg_flag) { 5617 PetscInt ii,jj; 5618 PetscBool valid_qr=PETSC_TRUE; 5619 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5620 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5621 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5622 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5623 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5624 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5625 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5626 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5627 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5628 for (jj=0;jj<size_of_constraint;jj++) { 5629 for (ii=0;ii<primal_dofs;ii++) { 5630 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5631 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5632 } 5633 } 5634 if (!valid_qr) { 5635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5636 for (jj=0;jj<size_of_constraint;jj++) { 5637 for (ii=0;ii<primal_dofs;ii++) { 5638 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5639 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5640 } 5641 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5642 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5643 } 5644 } 5645 } 5646 } else { 5647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5648 } 5649 } 5650 } else { /* simple transformation block */ 5651 PetscInt row,col; 5652 PetscScalar val,norm; 5653 5654 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5655 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5656 for (j=0;j<size_of_constraint;j++) { 5657 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5658 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5659 if (!PetscBTLookup(is_primal,row_B)) { 5660 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5661 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5662 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5663 } else { 5664 for (k=0;k<size_of_constraint;k++) { 5665 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5666 if (row != col) { 5667 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5668 } else { 5669 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5670 } 5671 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5672 } 5673 } 5674 } 5675 if (pcbddc->dbg_flag) { 5676 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5677 } 5678 } 5679 } else { 5680 if (pcbddc->dbg_flag) { 5681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5682 } 5683 } 5684 } 5685 5686 /* free workspace */ 5687 if (qr_needed) { 5688 if (pcbddc->dbg_flag) { 5689 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5690 } 5691 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5692 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5693 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5694 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5695 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5696 } 5697 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5698 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5700 5701 /* assembling of global change of variable */ 5702 if (!pcbddc->fake_change) { 5703 Mat tmat; 5704 PetscInt bs; 5705 5706 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5707 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5708 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5709 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5710 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5711 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5712 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5713 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5714 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5715 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5716 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5717 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5718 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5719 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5720 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5721 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5722 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5723 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5724 5725 /* check */ 5726 if (pcbddc->dbg_flag) { 5727 PetscReal error; 5728 Vec x,x_change; 5729 5730 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5731 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5732 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5733 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5734 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5735 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5736 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5737 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5738 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5739 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5740 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5741 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5742 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5743 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5744 ierr = VecDestroy(&x);CHKERRQ(ierr); 5745 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5746 } 5747 /* adapt sub_schurs computed (if any) */ 5748 if (pcbddc->use_deluxe_scaling) { 5749 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5750 5751 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5752 if (sub_schurs && sub_schurs->S_Ej_all) { 5753 Mat S_new,tmat; 5754 IS is_all_N,is_V_Sall = NULL; 5755 5756 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5757 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5758 if (pcbddc->deluxe_zerorows) { 5759 ISLocalToGlobalMapping NtoSall; 5760 IS is_V; 5761 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5762 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5763 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5764 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5765 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5766 } 5767 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5768 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5769 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5770 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5771 if (pcbddc->deluxe_zerorows) { 5772 const PetscScalar *array; 5773 const PetscInt *idxs_V,*idxs_all; 5774 PetscInt i,n_V; 5775 5776 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5777 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5778 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5779 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5780 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5781 for (i=0;i<n_V;i++) { 5782 PetscScalar val; 5783 PetscInt idx; 5784 5785 idx = idxs_V[i]; 5786 val = array[idxs_all[idxs_V[i]]]; 5787 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5788 } 5789 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5790 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5791 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5792 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5793 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5794 } 5795 sub_schurs->S_Ej_all = S_new; 5796 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5797 if (sub_schurs->sum_S_Ej_all) { 5798 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5799 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5800 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5801 if (pcbddc->deluxe_zerorows) { 5802 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5803 } 5804 sub_schurs->sum_S_Ej_all = S_new; 5805 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5806 } 5807 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5808 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5809 } 5810 /* destroy any change of basis context in sub_schurs */ 5811 if (sub_schurs && sub_schurs->change) { 5812 PetscInt i; 5813 5814 for (i=0;i<sub_schurs->n_subs;i++) { 5815 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5816 } 5817 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5818 } 5819 } 5820 if (pcbddc->switch_static) { /* need to save the local change */ 5821 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5822 } else { 5823 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5824 } 5825 /* determine if any process has changed the pressures locally */ 5826 pcbddc->change_interior = pcbddc->benign_have_null; 5827 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5828 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5829 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5830 pcbddc->use_qr_single = qr_needed; 5831 } 5832 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5833 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5834 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5835 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5836 } else { 5837 Mat benign_global = NULL; 5838 if (pcbddc->benign_have_null) { 5839 Mat tmat; 5840 5841 pcbddc->change_interior = PETSC_TRUE; 5842 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5843 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5844 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5845 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5846 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5847 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5848 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5849 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5850 if (pcbddc->benign_change) { 5851 Mat M; 5852 5853 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5854 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5855 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5856 ierr = MatDestroy(&M);CHKERRQ(ierr); 5857 } else { 5858 Mat eye; 5859 PetscScalar *array; 5860 5861 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5862 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5863 for (i=0;i<pcis->n;i++) { 5864 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5865 } 5866 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5867 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5868 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5869 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5870 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5871 } 5872 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5873 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5874 } 5875 if (pcbddc->user_ChangeOfBasisMatrix) { 5876 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5877 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5878 } else if (pcbddc->benign_have_null) { 5879 pcbddc->ChangeOfBasisMatrix = benign_global; 5880 } 5881 } 5882 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5883 IS is_global; 5884 const PetscInt *gidxs; 5885 5886 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5887 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5888 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5889 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5890 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5891 } 5892 } 5893 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5894 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5895 } 5896 5897 if (!pcbddc->fake_change) { 5898 /* add pressure dofs to set of primal nodes for numbering purposes */ 5899 for (i=0;i<pcbddc->benign_n;i++) { 5900 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5901 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5902 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5903 pcbddc->local_primal_size_cc++; 5904 pcbddc->local_primal_size++; 5905 } 5906 5907 /* check if a new primal space has been introduced (also take into account benign trick) */ 5908 pcbddc->new_primal_space_local = PETSC_TRUE; 5909 if (olocal_primal_size == pcbddc->local_primal_size) { 5910 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5911 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5912 if (!pcbddc->new_primal_space_local) { 5913 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5914 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5915 } 5916 } 5917 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5918 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5919 } 5920 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5921 5922 /* flush dbg viewer */ 5923 if (pcbddc->dbg_flag) { 5924 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5925 } 5926 5927 /* free workspace */ 5928 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5929 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5930 if (!pcbddc->adaptive_selection) { 5931 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5932 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5933 } else { 5934 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5935 pcbddc->adaptive_constraints_idxs_ptr, 5936 pcbddc->adaptive_constraints_data_ptr, 5937 pcbddc->adaptive_constraints_idxs, 5938 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5939 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5940 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5941 } 5942 PetscFunctionReturn(0); 5943 } 5944 5945 #undef __FUNCT__ 5946 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5947 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5948 { 5949 ISLocalToGlobalMapping map; 5950 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5951 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5952 PetscInt ierr,i,N; 5953 5954 PetscFunctionBegin; 5955 if (pcbddc->recompute_topography) { 5956 pcbddc->graphanalyzed = PETSC_FALSE; 5957 /* Reset previously computed graph */ 5958 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5959 /* Init local Graph struct */ 5960 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5961 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5962 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5963 5964 /* Check validity of the csr graph passed in by the user */ 5965 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5966 5967 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5968 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5969 PetscInt *xadj,*adjncy; 5970 PetscInt nvtxs; 5971 PetscBool flg_row=PETSC_FALSE; 5972 5973 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5974 if (flg_row) { 5975 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5976 pcbddc->computed_rowadj = PETSC_TRUE; 5977 } 5978 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5979 } 5980 if (pcbddc->dbg_flag) { 5981 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5982 } 5983 5984 /* Setup of Graph */ 5985 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5986 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5987 5988 /* attach info on disconnected subdomains if present */ 5989 if (pcbddc->n_local_subs) { 5990 PetscInt *local_subs; 5991 5992 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5993 for (i=0;i<pcbddc->n_local_subs;i++) { 5994 const PetscInt *idxs; 5995 PetscInt nl,j; 5996 5997 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5998 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5999 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6000 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6001 } 6002 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6003 pcbddc->mat_graph->local_subs = local_subs; 6004 } 6005 } 6006 6007 if (!pcbddc->graphanalyzed) { 6008 /* Graph's connected components analysis */ 6009 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6010 pcbddc->graphanalyzed = PETSC_TRUE; 6011 } 6012 PetscFunctionReturn(0); 6013 } 6014 6015 #undef __FUNCT__ 6016 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 6017 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6018 { 6019 PetscInt i,j; 6020 PetscScalar *alphas; 6021 PetscErrorCode ierr; 6022 6023 PetscFunctionBegin; 6024 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6025 for (i=0;i<n;i++) { 6026 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6027 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6028 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6029 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6030 } 6031 ierr = PetscFree(alphas);CHKERRQ(ierr); 6032 PetscFunctionReturn(0); 6033 } 6034 6035 #undef __FUNCT__ 6036 #define __FUNCT__ "MatISGetSubassemblingPattern" 6037 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6038 { 6039 Mat A; 6040 PetscInt n_neighs,*neighs,*n_shared,**shared; 6041 PetscMPIInt size,rank,color; 6042 PetscInt *xadj,*adjncy; 6043 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6044 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 6045 PetscInt void_procs,*procs_candidates = NULL; 6046 PetscInt xadj_count, *count; 6047 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6048 PetscSubcomm psubcomm; 6049 MPI_Comm subcomm; 6050 PetscErrorCode ierr; 6051 6052 PetscFunctionBegin; 6053 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6054 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6055 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6056 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6057 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6058 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6059 6060 if (have_void) *have_void = PETSC_FALSE; 6061 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6062 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6063 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6064 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6065 im_active = !!(n); 6066 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6067 void_procs = size - active_procs; 6068 /* get ranks of of non-active processes in mat communicator */ 6069 if (void_procs) { 6070 PetscInt ncand; 6071 6072 if (have_void) *have_void = PETSC_TRUE; 6073 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6074 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6075 for (i=0,ncand=0;i<size;i++) { 6076 if (!procs_candidates[i]) { 6077 procs_candidates[ncand++] = i; 6078 } 6079 } 6080 /* force n_subdomains to be not greater that the number of non-active processes */ 6081 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6082 } 6083 6084 /* number of subdomains requested greater than active processes -> just shift the matrix 6085 number of subdomains requested 1 -> send to master or first candidate in voids */ 6086 if (active_procs < *n_subdomains || *n_subdomains == 1) { 6087 PetscInt issize,isidx,dest; 6088 if (*n_subdomains == 1) dest = 0; 6089 else dest = rank; 6090 if (im_active) { 6091 issize = 1; 6092 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6093 isidx = procs_candidates[dest]; 6094 } else { 6095 isidx = dest; 6096 } 6097 } else { 6098 issize = 0; 6099 isidx = -1; 6100 } 6101 *n_subdomains = active_procs; 6102 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6103 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6104 PetscFunctionReturn(0); 6105 } 6106 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6107 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6108 threshold = PetscMax(threshold,2); 6109 6110 /* Get info on mapping */ 6111 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 6112 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6113 6114 /* build local CSR graph of subdomains' connectivity */ 6115 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6116 xadj[0] = 0; 6117 xadj[1] = PetscMax(n_neighs-1,0); 6118 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6119 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6120 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 6121 for (i=1;i<n_neighs;i++) 6122 for (j=0;j<n_shared[i];j++) 6123 count[shared[i][j]] += 1; 6124 6125 xadj_count = 0; 6126 for (i=1;i<n_neighs;i++) { 6127 for (j=0;j<n_shared[i];j++) { 6128 if (count[shared[i][j]] < threshold) { 6129 adjncy[xadj_count] = neighs[i]; 6130 adjncy_wgt[xadj_count] = n_shared[i]; 6131 xadj_count++; 6132 break; 6133 } 6134 } 6135 } 6136 xadj[1] = xadj_count; 6137 ierr = PetscFree(count);CHKERRQ(ierr); 6138 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6139 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6140 6141 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6142 6143 /* Restrict work on active processes only */ 6144 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6145 if (void_procs) { 6146 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6147 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6148 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6149 subcomm = PetscSubcommChild(psubcomm); 6150 } else { 6151 psubcomm = NULL; 6152 subcomm = PetscObjectComm((PetscObject)mat); 6153 } 6154 6155 v_wgt = NULL; 6156 if (!color) { 6157 ierr = PetscFree(xadj);CHKERRQ(ierr); 6158 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6159 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6160 } else { 6161 Mat subdomain_adj; 6162 IS new_ranks,new_ranks_contig; 6163 MatPartitioning partitioner; 6164 PetscInt rstart=0,rend=0; 6165 PetscInt *is_indices,*oldranks; 6166 PetscMPIInt size; 6167 PetscBool aggregate; 6168 6169 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6170 if (void_procs) { 6171 PetscInt prank = rank; 6172 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6173 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6174 for (i=0;i<xadj[1];i++) { 6175 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6176 } 6177 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6178 } else { 6179 oldranks = NULL; 6180 } 6181 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6182 if (aggregate) { /* TODO: all this part could be made more efficient */ 6183 PetscInt lrows,row,ncols,*cols; 6184 PetscMPIInt nrank; 6185 PetscScalar *vals; 6186 6187 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6188 lrows = 0; 6189 if (nrank<redprocs) { 6190 lrows = size/redprocs; 6191 if (nrank<size%redprocs) lrows++; 6192 } 6193 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6194 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6195 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6196 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6197 row = nrank; 6198 ncols = xadj[1]-xadj[0]; 6199 cols = adjncy; 6200 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6201 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6202 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6203 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6204 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6205 ierr = PetscFree(xadj);CHKERRQ(ierr); 6206 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6207 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6208 ierr = PetscFree(vals);CHKERRQ(ierr); 6209 if (use_vwgt) { 6210 Vec v; 6211 const PetscScalar *array; 6212 PetscInt nl; 6213 6214 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6215 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6216 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6217 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6218 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6219 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6220 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6221 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6222 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6223 ierr = VecDestroy(&v);CHKERRQ(ierr); 6224 } 6225 } else { 6226 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6227 if (use_vwgt) { 6228 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6229 v_wgt[0] = local_size; 6230 } 6231 } 6232 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6233 6234 /* Partition */ 6235 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6236 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6237 if (v_wgt) { 6238 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6239 } 6240 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6241 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6242 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6243 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6244 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6245 6246 /* renumber new_ranks to avoid "holes" in new set of processors */ 6247 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6248 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6249 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6250 if (!aggregate) { 6251 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6252 #if defined(PETSC_USE_DEBUG) 6253 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6254 #endif 6255 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6256 } else if (oldranks) { 6257 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6258 } else { 6259 ranks_send_to_idx[0] = is_indices[0]; 6260 } 6261 } else { 6262 PetscInt idxs[1]; 6263 PetscMPIInt tag; 6264 MPI_Request *reqs; 6265 6266 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6267 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6268 for (i=rstart;i<rend;i++) { 6269 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6270 } 6271 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6272 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6273 ierr = PetscFree(reqs);CHKERRQ(ierr); 6274 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6275 #if defined(PETSC_USE_DEBUG) 6276 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6277 #endif 6278 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6279 } else if (oldranks) { 6280 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6281 } else { 6282 ranks_send_to_idx[0] = idxs[0]; 6283 } 6284 } 6285 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6286 /* clean up */ 6287 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6288 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6289 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6290 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6291 } 6292 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6293 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6294 6295 /* assemble parallel IS for sends */ 6296 i = 1; 6297 if (!color) i=0; 6298 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6299 PetscFunctionReturn(0); 6300 } 6301 6302 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6303 6304 #undef __FUNCT__ 6305 #define __FUNCT__ "MatISSubassemble" 6306 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6307 { 6308 Mat local_mat; 6309 IS is_sends_internal; 6310 PetscInt rows,cols,new_local_rows; 6311 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6312 PetscBool ismatis,isdense,newisdense,destroy_mat; 6313 ISLocalToGlobalMapping l2gmap; 6314 PetscInt* l2gmap_indices; 6315 const PetscInt* is_indices; 6316 MatType new_local_type; 6317 /* buffers */ 6318 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6319 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6320 PetscInt *recv_buffer_idxs_local; 6321 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6322 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6323 /* MPI */ 6324 MPI_Comm comm,comm_n; 6325 PetscSubcomm subcomm; 6326 PetscMPIInt n_sends,n_recvs,commsize; 6327 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6328 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6329 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6330 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6331 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6332 PetscErrorCode ierr; 6333 6334 PetscFunctionBegin; 6335 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6336 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6337 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6338 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6339 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6340 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6341 PetscValidLogicalCollectiveBool(mat,reuse,6); 6342 PetscValidLogicalCollectiveInt(mat,nis,8); 6343 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6344 if (nvecs) { 6345 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6346 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6347 } 6348 /* further checks */ 6349 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6350 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6351 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6352 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6353 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6354 if (reuse && *mat_n) { 6355 PetscInt mrows,mcols,mnrows,mncols; 6356 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6357 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6358 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6359 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6360 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6361 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6362 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6363 } 6364 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6365 PetscValidLogicalCollectiveInt(mat,bs,0); 6366 6367 /* prepare IS for sending if not provided */ 6368 if (!is_sends) { 6369 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6370 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6371 } else { 6372 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6373 is_sends_internal = is_sends; 6374 } 6375 6376 /* get comm */ 6377 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6378 6379 /* compute number of sends */ 6380 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6381 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6382 6383 /* compute number of receives */ 6384 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6385 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6386 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6387 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6388 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6389 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6390 ierr = PetscFree(iflags);CHKERRQ(ierr); 6391 6392 /* restrict comm if requested */ 6393 subcomm = 0; 6394 destroy_mat = PETSC_FALSE; 6395 if (restrict_comm) { 6396 PetscMPIInt color,subcommsize; 6397 6398 color = 0; 6399 if (restrict_full) { 6400 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6401 } else { 6402 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6403 } 6404 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6405 subcommsize = commsize - subcommsize; 6406 /* check if reuse has been requested */ 6407 if (reuse) { 6408 if (*mat_n) { 6409 PetscMPIInt subcommsize2; 6410 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6411 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6412 comm_n = PetscObjectComm((PetscObject)*mat_n); 6413 } else { 6414 comm_n = PETSC_COMM_SELF; 6415 } 6416 } else { /* MAT_INITIAL_MATRIX */ 6417 PetscMPIInt rank; 6418 6419 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6420 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6421 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6422 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6423 comm_n = PetscSubcommChild(subcomm); 6424 } 6425 /* flag to destroy *mat_n if not significative */ 6426 if (color) destroy_mat = PETSC_TRUE; 6427 } else { 6428 comm_n = comm; 6429 } 6430 6431 /* prepare send/receive buffers */ 6432 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6433 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6434 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6435 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6436 if (nis) { 6437 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6438 } 6439 6440 /* Get data from local matrices */ 6441 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6442 /* TODO: See below some guidelines on how to prepare the local buffers */ 6443 /* 6444 send_buffer_vals should contain the raw values of the local matrix 6445 send_buffer_idxs should contain: 6446 - MatType_PRIVATE type 6447 - PetscInt size_of_l2gmap 6448 - PetscInt global_row_indices[size_of_l2gmap] 6449 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6450 */ 6451 else { 6452 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6453 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6454 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6455 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6456 send_buffer_idxs[1] = i; 6457 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6458 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6459 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6460 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6461 for (i=0;i<n_sends;i++) { 6462 ilengths_vals[is_indices[i]] = len*len; 6463 ilengths_idxs[is_indices[i]] = len+2; 6464 } 6465 } 6466 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6467 /* additional is (if any) */ 6468 if (nis) { 6469 PetscMPIInt psum; 6470 PetscInt j; 6471 for (j=0,psum=0;j<nis;j++) { 6472 PetscInt plen; 6473 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6474 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6475 psum += len+1; /* indices + lenght */ 6476 } 6477 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6478 for (j=0,psum=0;j<nis;j++) { 6479 PetscInt plen; 6480 const PetscInt *is_array_idxs; 6481 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6482 send_buffer_idxs_is[psum] = plen; 6483 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6484 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6485 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6486 psum += plen+1; /* indices + lenght */ 6487 } 6488 for (i=0;i<n_sends;i++) { 6489 ilengths_idxs_is[is_indices[i]] = psum; 6490 } 6491 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6492 } 6493 6494 buf_size_idxs = 0; 6495 buf_size_vals = 0; 6496 buf_size_idxs_is = 0; 6497 buf_size_vecs = 0; 6498 for (i=0;i<n_recvs;i++) { 6499 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6500 buf_size_vals += (PetscInt)olengths_vals[i]; 6501 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6502 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6503 } 6504 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6505 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6506 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6507 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6508 6509 /* get new tags for clean communications */ 6510 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6511 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6512 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6513 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6514 6515 /* allocate for requests */ 6516 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6517 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6518 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6519 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6520 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6521 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6522 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6523 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6524 6525 /* communications */ 6526 ptr_idxs = recv_buffer_idxs; 6527 ptr_vals = recv_buffer_vals; 6528 ptr_idxs_is = recv_buffer_idxs_is; 6529 ptr_vecs = recv_buffer_vecs; 6530 for (i=0;i<n_recvs;i++) { 6531 source_dest = onodes[i]; 6532 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6533 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6534 ptr_idxs += olengths_idxs[i]; 6535 ptr_vals += olengths_vals[i]; 6536 if (nis) { 6537 source_dest = onodes_is[i]; 6538 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6539 ptr_idxs_is += olengths_idxs_is[i]; 6540 } 6541 if (nvecs) { 6542 source_dest = onodes[i]; 6543 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6544 ptr_vecs += olengths_idxs[i]-2; 6545 } 6546 } 6547 for (i=0;i<n_sends;i++) { 6548 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6549 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6550 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6551 if (nis) { 6552 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6553 } 6554 if (nvecs) { 6555 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6556 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6557 } 6558 } 6559 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6560 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6561 6562 /* assemble new l2g map */ 6563 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6564 ptr_idxs = recv_buffer_idxs; 6565 new_local_rows = 0; 6566 for (i=0;i<n_recvs;i++) { 6567 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6568 ptr_idxs += olengths_idxs[i]; 6569 } 6570 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6571 ptr_idxs = recv_buffer_idxs; 6572 new_local_rows = 0; 6573 for (i=0;i<n_recvs;i++) { 6574 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6575 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6576 ptr_idxs += olengths_idxs[i]; 6577 } 6578 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6579 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6580 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6581 6582 /* infer new local matrix type from received local matrices type */ 6583 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6584 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6585 if (n_recvs) { 6586 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6587 ptr_idxs = recv_buffer_idxs; 6588 for (i=0;i<n_recvs;i++) { 6589 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6590 new_local_type_private = MATAIJ_PRIVATE; 6591 break; 6592 } 6593 ptr_idxs += olengths_idxs[i]; 6594 } 6595 switch (new_local_type_private) { 6596 case MATDENSE_PRIVATE: 6597 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6598 new_local_type = MATSEQAIJ; 6599 bs = 1; 6600 } else { /* if I receive only 1 dense matrix */ 6601 new_local_type = MATSEQDENSE; 6602 bs = 1; 6603 } 6604 break; 6605 case MATAIJ_PRIVATE: 6606 new_local_type = MATSEQAIJ; 6607 bs = 1; 6608 break; 6609 case MATBAIJ_PRIVATE: 6610 new_local_type = MATSEQBAIJ; 6611 break; 6612 case MATSBAIJ_PRIVATE: 6613 new_local_type = MATSEQSBAIJ; 6614 break; 6615 default: 6616 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6617 break; 6618 } 6619 } else { /* by default, new_local_type is seqdense */ 6620 new_local_type = MATSEQDENSE; 6621 bs = 1; 6622 } 6623 6624 /* create MATIS object if needed */ 6625 if (!reuse) { 6626 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6627 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6628 } else { 6629 /* it also destroys the local matrices */ 6630 if (*mat_n) { 6631 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6632 } else { /* this is a fake object */ 6633 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6634 } 6635 } 6636 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6637 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6638 6639 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6640 6641 /* Global to local map of received indices */ 6642 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6643 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6644 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6645 6646 /* restore attributes -> type of incoming data and its size */ 6647 buf_size_idxs = 0; 6648 for (i=0;i<n_recvs;i++) { 6649 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6650 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6651 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6652 } 6653 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6654 6655 /* set preallocation */ 6656 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6657 if (!newisdense) { 6658 PetscInt *new_local_nnz=0; 6659 6660 ptr_idxs = recv_buffer_idxs_local; 6661 if (n_recvs) { 6662 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6663 } 6664 for (i=0;i<n_recvs;i++) { 6665 PetscInt j; 6666 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6667 for (j=0;j<*(ptr_idxs+1);j++) { 6668 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6669 } 6670 } else { 6671 /* TODO */ 6672 } 6673 ptr_idxs += olengths_idxs[i]; 6674 } 6675 if (new_local_nnz) { 6676 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6677 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6678 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6679 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6680 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6681 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6682 } else { 6683 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6684 } 6685 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6686 } else { 6687 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6688 } 6689 6690 /* set values */ 6691 ptr_vals = recv_buffer_vals; 6692 ptr_idxs = recv_buffer_idxs_local; 6693 for (i=0;i<n_recvs;i++) { 6694 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6695 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6696 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6697 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6698 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6699 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6700 } else { 6701 /* TODO */ 6702 } 6703 ptr_idxs += olengths_idxs[i]; 6704 ptr_vals += olengths_vals[i]; 6705 } 6706 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6707 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6708 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6709 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6710 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6711 6712 #if 0 6713 if (!restrict_comm) { /* check */ 6714 Vec lvec,rvec; 6715 PetscReal infty_error; 6716 6717 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6718 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6719 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6720 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6721 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6722 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6723 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6724 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6725 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6726 } 6727 #endif 6728 6729 /* assemble new additional is (if any) */ 6730 if (nis) { 6731 PetscInt **temp_idxs,*count_is,j,psum; 6732 6733 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6734 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6735 ptr_idxs = recv_buffer_idxs_is; 6736 psum = 0; 6737 for (i=0;i<n_recvs;i++) { 6738 for (j=0;j<nis;j++) { 6739 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6740 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6741 psum += plen; 6742 ptr_idxs += plen+1; /* shift pointer to received data */ 6743 } 6744 } 6745 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6746 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6747 for (i=1;i<nis;i++) { 6748 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6749 } 6750 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6751 ptr_idxs = recv_buffer_idxs_is; 6752 for (i=0;i<n_recvs;i++) { 6753 for (j=0;j<nis;j++) { 6754 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6755 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6756 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6757 ptr_idxs += plen+1; /* shift pointer to received data */ 6758 } 6759 } 6760 for (i=0;i<nis;i++) { 6761 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6762 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6763 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6764 } 6765 ierr = PetscFree(count_is);CHKERRQ(ierr); 6766 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6767 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6768 } 6769 /* free workspace */ 6770 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6771 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6772 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6773 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6774 if (isdense) { 6775 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6776 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6777 } else { 6778 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6779 } 6780 if (nis) { 6781 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6782 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6783 } 6784 6785 if (nvecs) { 6786 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6787 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6788 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6789 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6790 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6791 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6792 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6793 /* set values */ 6794 ptr_vals = recv_buffer_vecs; 6795 ptr_idxs = recv_buffer_idxs_local; 6796 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6797 for (i=0;i<n_recvs;i++) { 6798 PetscInt j; 6799 for (j=0;j<*(ptr_idxs+1);j++) { 6800 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6801 } 6802 ptr_idxs += olengths_idxs[i]; 6803 ptr_vals += olengths_idxs[i]-2; 6804 } 6805 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6806 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6807 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6808 } 6809 6810 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6811 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6812 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6813 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6814 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6815 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6816 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6817 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6818 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6819 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6820 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6821 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6822 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6823 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6824 ierr = PetscFree(onodes);CHKERRQ(ierr); 6825 if (nis) { 6826 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6827 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6828 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6829 } 6830 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6831 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6832 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6833 for (i=0;i<nis;i++) { 6834 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6835 } 6836 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6837 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6838 } 6839 *mat_n = NULL; 6840 } 6841 PetscFunctionReturn(0); 6842 } 6843 6844 /* temporary hack into ksp private data structure */ 6845 #include <petsc/private/kspimpl.h> 6846 6847 #undef __FUNCT__ 6848 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6849 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6850 { 6851 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6852 PC_IS *pcis = (PC_IS*)pc->data; 6853 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6854 Mat coarsedivudotp = NULL; 6855 MatNullSpace CoarseNullSpace = NULL; 6856 ISLocalToGlobalMapping coarse_islg; 6857 IS coarse_is,*isarray; 6858 PetscInt i,im_active=-1,active_procs=-1; 6859 PetscInt nis,nisdofs,nisneu,nisvert; 6860 PC pc_temp; 6861 PCType coarse_pc_type; 6862 KSPType coarse_ksp_type; 6863 PetscBool multilevel_requested,multilevel_allowed; 6864 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6865 Mat t_coarse_mat_is; 6866 PetscInt ncoarse; 6867 PetscBool compute_vecs = PETSC_FALSE; 6868 PetscScalar *array; 6869 MatReuse coarse_mat_reuse; 6870 PetscBool restr, full_restr, have_void; 6871 PetscErrorCode ierr; 6872 6873 PetscFunctionBegin; 6874 /* Assign global numbering to coarse dofs */ 6875 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6876 PetscInt ocoarse_size; 6877 compute_vecs = PETSC_TRUE; 6878 ocoarse_size = pcbddc->coarse_size; 6879 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6880 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6881 /* see if we can avoid some work */ 6882 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6883 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6884 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6885 PC pc; 6886 PetscBool isbddc; 6887 6888 /* temporary workaround since PCBDDC does not have a reset method so far */ 6889 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6890 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6891 if (isbddc) { 6892 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6893 } else { 6894 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6895 } 6896 coarse_reuse = PETSC_FALSE; 6897 } else { /* we can safely reuse already computed coarse matrix */ 6898 coarse_reuse = PETSC_TRUE; 6899 } 6900 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6901 coarse_reuse = PETSC_FALSE; 6902 } 6903 /* reset any subassembling information */ 6904 if (!coarse_reuse || pcbddc->recompute_topography) { 6905 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6906 } 6907 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6908 coarse_reuse = PETSC_TRUE; 6909 } 6910 /* assemble coarse matrix */ 6911 if (coarse_reuse && pcbddc->coarse_ksp) { 6912 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6913 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6914 coarse_mat_reuse = MAT_REUSE_MATRIX; 6915 } else { 6916 coarse_mat = NULL; 6917 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6918 } 6919 6920 /* creates temporary l2gmap and IS for coarse indexes */ 6921 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6922 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6923 6924 /* creates temporary MATIS object for coarse matrix */ 6925 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6926 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6927 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6928 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6929 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6930 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6931 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6932 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6933 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6934 6935 /* count "active" (i.e. with positive local size) and "void" processes */ 6936 im_active = !!(pcis->n); 6937 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6938 6939 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6940 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6941 /* full_restr : just use the receivers from the subassembling pattern */ 6942 coarse_mat_is = NULL; 6943 multilevel_allowed = PETSC_FALSE; 6944 multilevel_requested = PETSC_FALSE; 6945 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6946 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6947 if (multilevel_requested) { 6948 ncoarse = active_procs/pcbddc->coarsening_ratio; 6949 restr = PETSC_FALSE; 6950 full_restr = PETSC_FALSE; 6951 } else { 6952 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6953 restr = PETSC_TRUE; 6954 full_restr = PETSC_TRUE; 6955 } 6956 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6957 ncoarse = PetscMax(1,ncoarse); 6958 if (!pcbddc->coarse_subassembling) { 6959 if (pcbddc->coarsening_ratio > 1) { 6960 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6961 } else { 6962 PetscMPIInt size,rank; 6963 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6964 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6965 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6966 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6967 } 6968 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6969 PetscInt psum; 6970 PetscMPIInt size; 6971 if (pcbddc->coarse_ksp) psum = 1; 6972 else psum = 0; 6973 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6974 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6975 if (ncoarse < size) have_void = PETSC_TRUE; 6976 } 6977 /* determine if we can go multilevel */ 6978 if (multilevel_requested) { 6979 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6980 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6981 } 6982 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6983 6984 /* dump subassembling pattern */ 6985 if (pcbddc->dbg_flag && multilevel_allowed) { 6986 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6987 } 6988 6989 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6990 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6991 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6992 const PetscInt *idxs; 6993 ISLocalToGlobalMapping tmap; 6994 6995 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6996 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6997 /* allocate space for temporary storage */ 6998 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6999 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7000 /* allocate for IS array */ 7001 nisdofs = pcbddc->n_ISForDofsLocal; 7002 nisneu = !!pcbddc->NeumannBoundariesLocal; 7003 nisvert = 0; /* nisvert is not used */ 7004 nis = nisdofs + nisneu + nisvert; 7005 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7006 /* dofs splitting */ 7007 for (i=0;i<nisdofs;i++) { 7008 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7009 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7010 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7011 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7012 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7013 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7014 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7015 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7016 } 7017 /* neumann boundaries */ 7018 if (pcbddc->NeumannBoundariesLocal) { 7019 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7020 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7021 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7022 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7023 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7024 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7025 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7026 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7027 } 7028 /* free memory */ 7029 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7030 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7031 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7032 } else { 7033 nis = 0; 7034 nisdofs = 0; 7035 nisneu = 0; 7036 nisvert = 0; 7037 isarray = NULL; 7038 } 7039 /* destroy no longer needed map */ 7040 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7041 7042 /* subassemble */ 7043 if (multilevel_allowed) { 7044 Vec vp[1]; 7045 PetscInt nvecs = 0; 7046 PetscBool reuse,reuser; 7047 7048 if (coarse_mat) reuse = PETSC_TRUE; 7049 else reuse = PETSC_FALSE; 7050 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7051 vp[0] = NULL; 7052 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7053 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7054 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7055 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7056 nvecs = 1; 7057 7058 if (pcbddc->divudotp) { 7059 Mat B,loc_divudotp; 7060 Vec v,p; 7061 IS dummy; 7062 PetscInt np; 7063 7064 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7065 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7066 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7067 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7068 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7069 ierr = VecSet(p,1.);CHKERRQ(ierr); 7070 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7071 ierr = VecDestroy(&p);CHKERRQ(ierr); 7072 ierr = MatDestroy(&B);CHKERRQ(ierr); 7073 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7074 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7075 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7076 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7077 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7078 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7079 ierr = VecDestroy(&v);CHKERRQ(ierr); 7080 } 7081 } 7082 if (reuser) { 7083 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7084 } else { 7085 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7086 } 7087 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7088 PetscScalar *arraym,*arrayv; 7089 PetscInt nl; 7090 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7091 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7092 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7093 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7094 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7095 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7096 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7097 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7098 } else { 7099 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7100 } 7101 } else { 7102 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 7103 } 7104 if (coarse_mat_is || coarse_mat) { 7105 PetscMPIInt size; 7106 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7107 if (!multilevel_allowed) { 7108 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7109 } else { 7110 Mat A; 7111 7112 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7113 if (coarse_mat_is) { 7114 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7115 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7116 coarse_mat = coarse_mat_is; 7117 } 7118 /* be sure we don't have MatSeqDENSE as local mat */ 7119 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7120 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7121 } 7122 } 7123 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7124 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7125 7126 /* create local to global scatters for coarse problem */ 7127 if (compute_vecs) { 7128 PetscInt lrows; 7129 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7130 if (coarse_mat) { 7131 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7132 } else { 7133 lrows = 0; 7134 } 7135 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7136 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7137 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7138 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7139 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7140 } 7141 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7142 7143 /* set defaults for coarse KSP and PC */ 7144 if (multilevel_allowed) { 7145 coarse_ksp_type = KSPRICHARDSON; 7146 coarse_pc_type = PCBDDC; 7147 } else { 7148 coarse_ksp_type = KSPPREONLY; 7149 coarse_pc_type = PCREDUNDANT; 7150 } 7151 7152 /* print some info if requested */ 7153 if (pcbddc->dbg_flag) { 7154 if (!multilevel_allowed) { 7155 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7156 if (multilevel_requested) { 7157 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7158 } else if (pcbddc->max_levels) { 7159 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7160 } 7161 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7162 } 7163 } 7164 7165 /* create the coarse KSP object only once with defaults */ 7166 if (coarse_mat) { 7167 PetscViewer dbg_viewer = NULL; 7168 if (pcbddc->dbg_flag) { 7169 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7170 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7171 } 7172 if (!pcbddc->coarse_ksp) { 7173 char prefix[256],str_level[16]; 7174 size_t len; 7175 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7176 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7177 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7178 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7179 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7180 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7181 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7182 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7183 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7184 /* prefix */ 7185 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7186 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7187 if (!pcbddc->current_level) { 7188 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7189 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7190 } else { 7191 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7192 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7193 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7194 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7195 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7196 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7197 } 7198 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7199 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7200 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7201 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7202 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7203 /* allow user customization */ 7204 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7205 } 7206 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7207 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7208 if (nisdofs) { 7209 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7210 for (i=0;i<nisdofs;i++) { 7211 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7212 } 7213 } 7214 if (nisneu) { 7215 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7216 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7217 } 7218 if (nisvert) { 7219 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7220 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7221 } 7222 7223 /* get some info after set from options */ 7224 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7225 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7226 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7227 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7228 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7229 isbddc = PETSC_FALSE; 7230 } 7231 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7232 if (isredundant) { 7233 KSP inner_ksp; 7234 PC inner_pc; 7235 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7236 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7237 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7238 } 7239 7240 /* parameters which miss an API */ 7241 if (isbddc) { 7242 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7243 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7244 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7245 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7246 if (pcbddc_coarse->benign_saddle_point) { 7247 Mat coarsedivudotp_is; 7248 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7249 IS row,col; 7250 const PetscInt *gidxs; 7251 PetscInt n,st,M,N; 7252 7253 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7254 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7255 st = st-n; 7256 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7257 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7258 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7259 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7260 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7261 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7262 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7263 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7264 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7265 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7266 ierr = ISDestroy(&row);CHKERRQ(ierr); 7267 ierr = ISDestroy(&col);CHKERRQ(ierr); 7268 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7269 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7270 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7271 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7272 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7273 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7274 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7275 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7276 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7277 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7278 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7279 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7280 } 7281 } 7282 7283 /* propagate symmetry info of coarse matrix */ 7284 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7285 if (pc->pmat->symmetric_set) { 7286 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7287 } 7288 if (pc->pmat->hermitian_set) { 7289 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7290 } 7291 if (pc->pmat->spd_set) { 7292 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7293 } 7294 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7295 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7296 } 7297 /* set operators */ 7298 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7299 if (pcbddc->dbg_flag) { 7300 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7301 } 7302 } 7303 ierr = PetscFree(isarray);CHKERRQ(ierr); 7304 #if 0 7305 { 7306 PetscViewer viewer; 7307 char filename[256]; 7308 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7309 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7310 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7311 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7312 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7313 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7314 } 7315 #endif 7316 7317 if (pcbddc->coarse_ksp) { 7318 Vec crhs,csol; 7319 7320 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7321 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7322 if (!csol) { 7323 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7324 } 7325 if (!crhs) { 7326 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7327 } 7328 } 7329 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7330 7331 /* compute null space for coarse solver if the benign trick has been requested */ 7332 if (pcbddc->benign_null) { 7333 7334 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7335 for (i=0;i<pcbddc->benign_n;i++) { 7336 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7337 } 7338 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7339 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7340 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7341 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7342 if (coarse_mat) { 7343 Vec nullv; 7344 PetscScalar *array,*array2; 7345 PetscInt nl; 7346 7347 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7348 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7349 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7350 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7351 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7352 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7353 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7354 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7355 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7356 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7357 } 7358 } 7359 7360 if (pcbddc->coarse_ksp) { 7361 PetscBool ispreonly; 7362 7363 if (CoarseNullSpace) { 7364 PetscBool isnull; 7365 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7366 if (isnull) { 7367 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7368 } 7369 /* TODO: add local nullspaces (if any) */ 7370 } 7371 /* setup coarse ksp */ 7372 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7373 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7374 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7375 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7376 KSP check_ksp; 7377 KSPType check_ksp_type; 7378 PC check_pc; 7379 Vec check_vec,coarse_vec; 7380 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7381 PetscInt its; 7382 PetscBool compute_eigs; 7383 PetscReal *eigs_r,*eigs_c; 7384 PetscInt neigs; 7385 const char *prefix; 7386 7387 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7388 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7389 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7390 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7391 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7392 /* prevent from setup unneeded object */ 7393 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7394 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7395 if (ispreonly) { 7396 check_ksp_type = KSPPREONLY; 7397 compute_eigs = PETSC_FALSE; 7398 } else { 7399 check_ksp_type = KSPGMRES; 7400 compute_eigs = PETSC_TRUE; 7401 } 7402 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7403 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7404 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7405 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7406 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7407 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7408 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7409 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7410 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7411 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7412 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7413 /* create random vec */ 7414 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7415 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7416 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7417 /* solve coarse problem */ 7418 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7419 /* set eigenvalue estimation if preonly has not been requested */ 7420 if (compute_eigs) { 7421 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7422 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7423 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7424 if (neigs) { 7425 lambda_max = eigs_r[neigs-1]; 7426 lambda_min = eigs_r[0]; 7427 if (pcbddc->use_coarse_estimates) { 7428 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7429 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7430 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7431 } 7432 } 7433 } 7434 } 7435 7436 /* check coarse problem residual error */ 7437 if (pcbddc->dbg_flag) { 7438 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7439 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7440 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7441 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7442 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7443 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7444 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7445 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7446 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7447 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7448 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7449 if (CoarseNullSpace) { 7450 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7451 } 7452 if (compute_eigs) { 7453 PetscReal lambda_max_s,lambda_min_s; 7454 KSPConvergedReason reason; 7455 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7456 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7457 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7458 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7459 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7460 for (i=0;i<neigs;i++) { 7461 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7462 } 7463 } 7464 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7465 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7466 } 7467 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7468 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7469 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7470 if (compute_eigs) { 7471 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7472 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7473 } 7474 } 7475 } 7476 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7477 /* print additional info */ 7478 if (pcbddc->dbg_flag) { 7479 /* waits until all processes reaches this point */ 7480 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7481 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7482 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7483 } 7484 7485 /* free memory */ 7486 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7487 PetscFunctionReturn(0); 7488 } 7489 7490 #undef __FUNCT__ 7491 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7492 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7493 { 7494 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7495 PC_IS* pcis = (PC_IS*)pc->data; 7496 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7497 IS subset,subset_mult,subset_n; 7498 PetscInt local_size,coarse_size=0; 7499 PetscInt *local_primal_indices=NULL; 7500 const PetscInt *t_local_primal_indices; 7501 PetscErrorCode ierr; 7502 7503 PetscFunctionBegin; 7504 /* Compute global number of coarse dofs */ 7505 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7506 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7507 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7508 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7509 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7510 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7511 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7512 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7513 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7514 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7515 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7516 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7517 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7518 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7519 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7520 7521 /* check numbering */ 7522 if (pcbddc->dbg_flag) { 7523 PetscScalar coarsesum,*array,*array2; 7524 PetscInt i; 7525 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7526 7527 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7528 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7529 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7530 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7531 /* counter */ 7532 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7533 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7534 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7535 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7536 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7537 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7538 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7539 for (i=0;i<pcbddc->local_primal_size;i++) { 7540 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7541 } 7542 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7543 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7544 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7545 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7546 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7547 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7548 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7549 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7550 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7551 for (i=0;i<pcis->n;i++) { 7552 if (array[i] != 0.0 && array[i] != array2[i]) { 7553 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7554 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7555 set_error = PETSC_TRUE; 7556 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7557 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7558 } 7559 } 7560 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7561 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7562 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7563 for (i=0;i<pcis->n;i++) { 7564 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7565 } 7566 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7567 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7568 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7569 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7570 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7571 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7572 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7573 PetscInt *gidxs; 7574 7575 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7576 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7577 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7578 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7579 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7580 for (i=0;i<pcbddc->local_primal_size;i++) { 7581 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7582 } 7583 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7584 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7585 } 7586 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7587 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7588 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7589 } 7590 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7591 /* get back data */ 7592 *coarse_size_n = coarse_size; 7593 *local_primal_indices_n = local_primal_indices; 7594 PetscFunctionReturn(0); 7595 } 7596 7597 #undef __FUNCT__ 7598 #define __FUNCT__ "PCBDDCGlobalToLocal" 7599 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7600 { 7601 IS localis_t; 7602 PetscInt i,lsize,*idxs,n; 7603 PetscScalar *vals; 7604 PetscErrorCode ierr; 7605 7606 PetscFunctionBegin; 7607 /* get indices in local ordering exploiting local to global map */ 7608 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7609 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7610 for (i=0;i<lsize;i++) vals[i] = 1.0; 7611 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7612 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7613 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7614 if (idxs) { /* multilevel guard */ 7615 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7616 } 7617 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7618 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7619 ierr = PetscFree(vals);CHKERRQ(ierr); 7620 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7621 /* now compute set in local ordering */ 7622 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7623 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7624 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7625 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7626 for (i=0,lsize=0;i<n;i++) { 7627 if (PetscRealPart(vals[i]) > 0.5) { 7628 lsize++; 7629 } 7630 } 7631 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7632 for (i=0,lsize=0;i<n;i++) { 7633 if (PetscRealPart(vals[i]) > 0.5) { 7634 idxs[lsize++] = i; 7635 } 7636 } 7637 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7638 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7639 *localis = localis_t; 7640 PetscFunctionReturn(0); 7641 } 7642 7643 #undef __FUNCT__ 7644 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7645 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7646 { 7647 PC_IS *pcis=(PC_IS*)pc->data; 7648 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7649 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7650 Mat S_j; 7651 PetscInt *used_xadj,*used_adjncy; 7652 PetscBool free_used_adj; 7653 PetscErrorCode ierr; 7654 7655 PetscFunctionBegin; 7656 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7657 free_used_adj = PETSC_FALSE; 7658 if (pcbddc->sub_schurs_layers == -1) { 7659 used_xadj = NULL; 7660 used_adjncy = NULL; 7661 } else { 7662 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7663 used_xadj = pcbddc->mat_graph->xadj; 7664 used_adjncy = pcbddc->mat_graph->adjncy; 7665 } else if (pcbddc->computed_rowadj) { 7666 used_xadj = pcbddc->mat_graph->xadj; 7667 used_adjncy = pcbddc->mat_graph->adjncy; 7668 } else { 7669 PetscBool flg_row=PETSC_FALSE; 7670 const PetscInt *xadj,*adjncy; 7671 PetscInt nvtxs; 7672 7673 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7674 if (flg_row) { 7675 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7676 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7677 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7678 free_used_adj = PETSC_TRUE; 7679 } else { 7680 pcbddc->sub_schurs_layers = -1; 7681 used_xadj = NULL; 7682 used_adjncy = NULL; 7683 } 7684 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7685 } 7686 } 7687 7688 /* setup sub_schurs data */ 7689 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7690 if (!sub_schurs->schur_explicit) { 7691 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7692 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7693 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7694 } else { 7695 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7696 PetscBool isseqaij,need_change = PETSC_FALSE; 7697 PetscInt benign_n; 7698 Mat change = NULL; 7699 Vec scaling = NULL; 7700 IS change_primal = NULL; 7701 7702 if (!pcbddc->use_vertices && reuse_solvers) { 7703 PetscInt n_vertices; 7704 7705 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7706 reuse_solvers = (PetscBool)!n_vertices; 7707 } 7708 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7709 if (!isseqaij) { 7710 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7711 if (matis->A == pcbddc->local_mat) { 7712 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7713 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7714 } else { 7715 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7716 } 7717 } 7718 if (!pcbddc->benign_change_explicit) { 7719 benign_n = pcbddc->benign_n; 7720 } else { 7721 benign_n = 0; 7722 } 7723 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7724 We need a global reduction to avoid possible deadlocks. 7725 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7726 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7727 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7728 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7729 need_change = (PetscBool)(!need_change); 7730 } 7731 /* If the user defines additional constraints, we import them here. 7732 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7733 if (need_change) { 7734 PC_IS *pcisf; 7735 PC_BDDC *pcbddcf; 7736 PC pcf; 7737 7738 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7739 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7740 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7741 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7742 /* hacks */ 7743 pcisf = (PC_IS*)pcf->data; 7744 pcisf->is_B_local = pcis->is_B_local; 7745 pcisf->vec1_N = pcis->vec1_N; 7746 pcisf->BtoNmap = pcis->BtoNmap; 7747 pcisf->n = pcis->n; 7748 pcisf->n_B = pcis->n_B; 7749 pcbddcf = (PC_BDDC*)pcf->data; 7750 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7751 pcbddcf->mat_graph = pcbddc->mat_graph; 7752 pcbddcf->use_faces = PETSC_TRUE; 7753 pcbddcf->use_change_of_basis = PETSC_TRUE; 7754 pcbddcf->use_change_on_faces = PETSC_TRUE; 7755 pcbddcf->use_qr_single = PETSC_TRUE; 7756 pcbddcf->fake_change = PETSC_TRUE; 7757 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7758 /* store information on primal vertices and change of basis (in local numbering) */ 7759 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7760 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7761 change = pcbddcf->ConstraintMatrix; 7762 pcbddcf->ConstraintMatrix = NULL; 7763 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7764 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7765 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7766 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7767 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7768 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7769 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7770 pcf->ops->destroy = NULL; 7771 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7772 } 7773 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7774 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7775 ierr = MatDestroy(&change);CHKERRQ(ierr); 7776 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7777 } 7778 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7779 7780 /* free adjacency */ 7781 if (free_used_adj) { 7782 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7783 } 7784 PetscFunctionReturn(0); 7785 } 7786 7787 #undef __FUNCT__ 7788 #define __FUNCT__ "PCBDDCInitSubSchurs" 7789 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7790 { 7791 PC_IS *pcis=(PC_IS*)pc->data; 7792 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7793 PCBDDCGraph graph; 7794 PetscErrorCode ierr; 7795 7796 PetscFunctionBegin; 7797 /* attach interface graph for determining subsets */ 7798 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7799 IS verticesIS,verticescomm; 7800 PetscInt vsize,*idxs; 7801 7802 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7803 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7804 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7805 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7806 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7807 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7808 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7809 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7810 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7811 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7812 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7813 } else { 7814 graph = pcbddc->mat_graph; 7815 } 7816 /* print some info */ 7817 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 7818 IS vertices; 7819 PetscInt nv,nedges,nfaces; 7820 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7821 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7822 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7823 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7824 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7825 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7826 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7827 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7828 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7829 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7830 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7831 } 7832 7833 /* sub_schurs init */ 7834 if (!pcbddc->sub_schurs) { 7835 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7836 } 7837 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 7838 7839 /* free graph struct */ 7840 if (pcbddc->sub_schurs_rebuild) { 7841 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7842 } 7843 PetscFunctionReturn(0); 7844 } 7845 7846 #undef __FUNCT__ 7847 #define __FUNCT__ "PCBDDCCheckOperator" 7848 PetscErrorCode PCBDDCCheckOperator(PC pc) 7849 { 7850 PC_IS *pcis=(PC_IS*)pc->data; 7851 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7852 PetscErrorCode ierr; 7853 7854 PetscFunctionBegin; 7855 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7856 IS zerodiag = NULL; 7857 Mat S_j,B0_B=NULL; 7858 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7859 PetscScalar *p0_check,*array,*array2; 7860 PetscReal norm; 7861 PetscInt i; 7862 7863 /* B0 and B0_B */ 7864 if (zerodiag) { 7865 IS dummy; 7866 7867 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7868 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7869 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7870 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7871 } 7872 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7873 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7874 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7875 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7876 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7877 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7878 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7879 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7880 /* S_j */ 7881 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7882 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7883 7884 /* mimic vector in \widetilde{W}_\Gamma */ 7885 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7886 /* continuous in primal space */ 7887 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7888 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7889 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7890 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7891 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7892 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7893 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7894 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7895 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7896 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7897 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7898 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7899 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7900 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7901 7902 /* assemble rhs for coarse problem */ 7903 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7904 /* local with Schur */ 7905 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7906 if (zerodiag) { 7907 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7908 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7909 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7910 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7911 } 7912 /* sum on primal nodes the local contributions */ 7913 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7914 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7915 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7916 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7917 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7918 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7919 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7920 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7921 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7922 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7923 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7924 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7925 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7926 /* scale primal nodes (BDDC sums contibutions) */ 7927 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7928 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7929 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7930 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7931 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7932 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7933 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7934 /* global: \widetilde{B0}_B w_\Gamma */ 7935 if (zerodiag) { 7936 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7937 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7938 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7939 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7940 } 7941 /* BDDC */ 7942 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7943 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7944 7945 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7946 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7947 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7948 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7949 for (i=0;i<pcbddc->benign_n;i++) { 7950 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7951 } 7952 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7953 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7954 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7955 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7956 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7957 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7958 } 7959 PetscFunctionReturn(0); 7960 } 7961