1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 if (!esize) PetscFunctionReturn(0); 84 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 85 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 86 87 /* gradients */ 88 ptr = work + 5*esize; 89 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 90 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 91 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 92 ierr = MatDestroy(&GE);CHKERRQ(ierr); 93 94 /* constants */ 95 ptr += rsize*csize; 96 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 97 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 99 ierr = MatDestroy(&GE);CHKERRQ(ierr); 100 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 101 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "PCBDDCNedelecSupport" 107 PetscErrorCode PCBDDCNedelecSupport(PC pc) 108 { 109 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 110 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 111 Mat G,T,conn,lG,lGt,lGis,lGall,lGe; 112 MatNullSpace nnsp; 113 Vec tvec,*quads; 114 PetscSF sfv; 115 ISLocalToGlobalMapping el2g,vl2g,fl2g; 116 MPI_Comm comm; 117 IS lned,primals,allprimals,nedfieldlocal; 118 IS *eedges,*extrows,*extcols,*alleedges; 119 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee; 120 PetscScalar *vals,*work; 121 PetscReal *rwork; 122 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 123 PetscInt ne,nv,Lv,order,n,field; 124 PetscInt n_neigh,*neigh,*n_shared,**shared; 125 PetscInt i,j,extmem,cum,maxsize,rst,nee,nquads=2; 126 PetscInt *extrow,*extrowcum,*marks,*emarks,*vmarks,*gidxs; 127 PetscInt *sfvleaves,*sfvroots; 128 PetscBool print,eerr,done,lrc[2],conforming; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 /* test variable order code and print debug info TODO: to be removed */ 133 print = PETSC_FALSE; 134 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 135 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 136 137 /* Return to caller if there are no edges in the decomposition */ 138 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 139 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 140 ierr = ISLocalToGlobalMappingGetSize(el2g,&n);CHKERRQ(ierr); 141 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 142 lrc[0] = PETSC_FALSE; 143 for (i=0;i<n;i++) { 144 if (PetscRealPart(vals[i]) > 2.) { 145 lrc[0] = PETSC_TRUE; 146 break; 147 } 148 } 149 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 150 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 151 if (!lrc[1]) PetscFunctionReturn(0); 152 153 /* Get discrete gradient 154 If it is defined for a subset of dofs, assumes G is given in global ordering for all the dofs */ 155 G = pcbddc->discretegradient; 156 order = pcbddc->nedorder; 157 conforming = pcbddc->conforming; 158 field = pcbddc->nedfield; 159 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 160 if (pcbddc->n_ISForDofsLocal && field > -1) { 161 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 162 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 163 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 164 } else { 165 PetscBool testnedfield = PETSC_FALSE; 166 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 167 if (!testnedfield) { 168 ne = n; 169 nedfieldlocal = NULL; 170 } else { 171 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 172 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 173 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 174 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 175 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 176 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 177 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 178 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 179 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 180 for (i=0,cum=0;i<n;i++) { 181 if (matis->sf_leafdata[i] > 1) { 182 matis->sf_leafdata[cum++] = i; 183 } 184 } 185 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 186 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 187 } 188 } 189 190 if (nedfieldlocal) { /* merge with previous code when testing is done */ 191 IS is; 192 193 /* need to map from the local Nedelec field to local numbering */ 194 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 195 /* need to map from the local Nedelec field to global numbering */ 196 ierr = ISLocalToGlobalMappingApplyIS(el2g,nedfieldlocal,&is);CHKERRQ(ierr); 197 ierr = ISLocalToGlobalMappingCreateIS(is,&el2g);CHKERRQ(ierr); 198 ierr = ISDestroy(&is);CHKERRQ(ierr); 199 } else { 200 ierr = PetscObjectReference((PetscObject)el2g);CHKERRQ(ierr); 201 fl2g = NULL; 202 } 203 204 /* Sanity checks */ 205 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 206 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 207 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 208 209 /* Drop connections for interior edges (this modifies G) */ 210 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 211 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 212 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 213 if (nedfieldlocal) { 214 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 215 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 216 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 217 } else { 218 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 219 } 220 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 221 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 222 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 223 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 224 if (matis->sf_rootdata[i] < 2) { 225 matis->sf_rootdata[cum++] = i + rst; 226 } 227 } 228 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 229 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 230 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 231 232 /* Extract subdomain relevant rows of G */ 233 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 234 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 235 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 236 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 237 ierr = ISDestroy(&lned);CHKERRQ(ierr); 238 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 239 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 240 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 241 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 242 if (print) { 243 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 244 ierr = MatView(lG,NULL);CHKERRQ(ierr); 245 } 246 247 /* SF for nodal communications */ 248 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 249 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 250 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 251 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 252 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 253 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 254 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 255 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 257 258 /* Destroy temporary G created in MATIS format */ 259 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 260 261 /* Analyze the edge-nodes connections (duplicate lG) */ 262 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 263 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 264 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 265 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 266 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 267 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 268 /* need to import the boundary specification to ensure the 269 proper detection of coarse edges' endpoints */ 270 if (pcbddc->DirichletBoundariesLocal) { 271 IS is; 272 273 if (fl2g) { 274 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 275 } else { 276 is = pcbddc->DirichletBoundariesLocal; 277 } 278 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 279 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 280 for (i=0;i<cum;i++) { 281 if (idxs[i] >= 0) { 282 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 283 } 284 } 285 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 286 if (fl2g) { 287 ierr = ISDestroy(&is);CHKERRQ(ierr); 288 } 289 } 290 if (pcbddc->NeumannBoundariesLocal) { 291 IS is; 292 293 if (fl2g) { 294 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 295 } else { 296 is = pcbddc->NeumannBoundariesLocal; 297 } 298 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 299 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 300 for (i=0;i<cum;i++) { 301 if (idxs[i] >= 0) { 302 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 303 } 304 } 305 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 306 if (fl2g) { 307 ierr = ISDestroy(&is);CHKERRQ(ierr); 308 } 309 } 310 311 /* need to remove coarse faces' dofs to ensure the 312 proper detection of coarse edges' endpoints */ 313 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 314 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 315 for (i=1;i<n_neigh;i++) { 316 PetscInt j; 317 for (j=0;j<n_shared[i];j++) marks[shared[i][j]]++; 318 } 319 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 320 cum = 0; 321 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 322 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 323 for (i=0;i<ne;i++) { 324 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 325 if (!marks[i] || (marks[i] == 1 && !PetscBTLookup(btb,i))) { 326 marks[cum++] = i; 327 continue; 328 } 329 /* set badly connected edge dofs as primal */ 330 if (!conforming && ii[i+1]-ii[i] != order + 1) { 331 marks[cum++] = i; 332 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 333 for (j=ii[i];j<ii[i+1];j++) { 334 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 335 } 336 } 337 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 338 if (!order && ii[i+1] != ii[i]) { 339 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 340 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 341 } 342 } 343 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 344 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 345 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 346 /* identify splitpoints and corner candidates: TODO variable order */ 347 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 348 if (print) { 349 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 350 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 351 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 352 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 353 } 354 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 355 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 356 for (i=0;i<nv;i++) { 357 PetscInt ord = order, test = ii[i+1]-ii[i]; 358 if (!order) { 359 PetscReal vorder = 0.; 360 361 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 362 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 363 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 364 ord = 1; 365 } 366 #if defined(PETSC_USE_DEBUG) 367 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 368 #endif 369 if (test >= 3*ord) { /* splitpoints */ 370 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 371 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 372 } else if (test == ord) { 373 if (order == 1) { 374 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 375 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 376 } else { 377 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 378 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 379 } 380 } 381 } 382 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 383 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 384 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 385 386 /* Get the local G^T explicitly */ 387 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 388 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 389 if (print) { 390 ierr = PetscObjectSetName((PetscObject)lGt,"initial_lGt");CHKERRQ(ierr); 391 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 392 } 393 394 /* Mark interior nodal dofs */ 395 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 396 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 397 for (i=1;i<n_neigh;i++) { 398 for (j=0;j<n_shared[i];j++) { 399 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 400 } 401 } 402 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 403 404 /* communicate corners and splitpoints */ 405 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 406 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 407 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 408 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 409 410 if (print) { 411 IS tbz; 412 413 cum = 0; 414 for (i=0;i<nv;i++) 415 if (sfvleaves[i]) 416 vmarks[cum++] = i; 417 418 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 419 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 420 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 421 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 422 } 423 424 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 425 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 426 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 427 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 428 429 /* Zero rows of lGt corresponding to identified corners 430 and interior nodal dofs */ 431 cum = 0; 432 for (i=0;i<nv;i++) { 433 if (sfvleaves[i]) { 434 vmarks[cum++] = i; 435 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 436 } 437 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 438 } 439 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 440 if (print) { 441 IS tbz; 442 443 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 444 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 445 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 446 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 447 } 448 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 449 ierr = PetscFree(vmarks);CHKERRQ(ierr); 450 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 451 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 452 453 /* Recompute G */ 454 ierr = MatDestroy(&lG);CHKERRQ(ierr); 455 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 456 if (print) { 457 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 458 ierr = MatView(lG,NULL);CHKERRQ(ierr); 459 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 460 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 461 } 462 463 /* Get primal dofs (if any) */ 464 cum = 0; 465 for (i=0;i<ne;i++) { 466 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 467 } 468 if (fl2g) { 469 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 470 } 471 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 472 if (print) { 473 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 474 ierr = ISView(primals,NULL);CHKERRQ(ierr); 475 } 476 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 477 /* TODO: what if the user passed in some of them ? */ 478 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 479 ierr = ISDestroy(&primals);CHKERRQ(ierr); 480 481 /* Compute edge connectivity */ 482 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 483 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 484 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 485 if (done && i && ii && jj) { /* when lG is empty, don't pass pointers */ 486 if (fl2g) { 487 PetscBT btf; 488 PetscInt *iia,*jja,*iiu,*jju; 489 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 490 491 /* create CSR for all local dofs */ 492 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 493 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 494 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 495 iiu = pcbddc->mat_graph->xadj; 496 jju = pcbddc->mat_graph->adjncy; 497 } else if (pcbddc->use_local_adj) { 498 rest = PETSC_TRUE; 499 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 500 } else { 501 free = PETSC_TRUE; 502 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 503 iiu[0] = 0; 504 for (i=0;i<n;i++) { 505 iiu[i+1] = i+1; 506 jju[i] = -1; 507 } 508 } 509 510 /* import sizes of CSR */ 511 iia[0] = 0; 512 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 513 514 /* overwrite entries corresponding to the Nedelec field */ 515 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 516 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 517 for (i=0;i<ne;i++) { 518 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 519 iia[idxs[i]+1] = ii[i+1]-ii[i]; 520 } 521 522 /* iia in CSR */ 523 for (i=0;i<n;i++) iia[i+1] += iia[i]; 524 525 /* jja in CSR */ 526 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 527 for (i=0;i<n;i++) 528 if (!PetscBTLookup(btf,i)) 529 for (j=0;j<iiu[i+1]-iiu[i];j++) 530 jja[iia[i]+j] = jju[iiu[i]+j]; 531 532 /* map edge dofs connectivity */ 533 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 534 for (i=0;i<ne;i++) { 535 PetscInt e = idxs[i]; 536 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 537 } 538 539 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 540 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 541 if (rest) { 542 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 543 } 544 if (free) { 545 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 546 } 547 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 548 } else { 549 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 550 } 551 } 552 553 /* Analyze interface for edge dofs */ 554 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 555 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 556 557 /* Get coarse edges in the edge space */ 558 pcbddc->mat_graph->twodim = PETSC_FALSE; 559 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 560 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 561 562 if (fl2g) { 563 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 564 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 565 for (i=0;i<nee;i++) { 566 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 567 } 568 } else { 569 eedges = alleedges; 570 primals = allprimals; 571 } 572 573 /* Mark fine edge dofs with their coarse edge id */ 574 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 575 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 576 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 577 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 578 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 579 if (print) { 580 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 581 ierr = ISView(primals,NULL);CHKERRQ(ierr); 582 } 583 584 maxsize = 0; 585 for (i=0;i<nee;i++) { 586 PetscInt size,mark = i+1; 587 588 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 589 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 590 for (j=0;j<size;j++) marks[idxs[j]] = mark; 591 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 592 maxsize = PetscMax(maxsize,size); 593 } 594 595 /* Find coarse edge endpoints */ 596 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 597 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 598 for (i=0;i<nee;i++) { 599 PetscInt mark = i+1,size; 600 601 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 602 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 603 if (print) { 604 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 605 ISView(eedges[i],NULL); 606 } 607 for (j=0;j<size;j++) { 608 PetscInt k, ee = idxs[j]; 609 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 610 for (k=ii[ee];k<ii[ee+1];k++) { 611 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 612 if (PetscBTLookup(btv,jj[k])) { 613 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 614 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 615 if (print) PetscPrintf(PETSC_COMM_SELF," candidate? %d\n",(PetscBool)(PetscBTLookup(btvcand,jj[k]))); 616 PetscInt k2; 617 PetscBool corner = PETSC_FALSE; 618 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 619 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 620 /* it's a corner if either is connected with an edge dof belonging to a different cc or 621 if the edge dof lie on the natural part of the boundary */ 622 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 623 corner = PETSC_TRUE; 624 break; 625 } 626 } 627 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 628 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 629 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 630 } else { 631 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 632 } 633 } 634 } 635 } 636 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 637 } 638 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 639 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 640 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 641 642 /* Reset marked primal dofs */ 643 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 644 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 645 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 646 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 647 648 /* Compute extended cols indices */ 649 ierr = MatGetRowIJ(lG ,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 650 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 651 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 652 i *= maxsize; 653 ierr = PetscMalloc1(nee,&extcols);CHKERRQ(ierr); 654 ierr = PetscCalloc1(nee,&emarks);CHKERRQ(ierr); 655 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 656 eerr = PETSC_FALSE; 657 for (i=0;i<nee;i++) { 658 PetscInt size; 659 660 cum = 0; 661 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 662 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 663 for (j=0;j<size;j++) { 664 PetscInt k,ee = idxs[j]; 665 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 666 } 667 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 668 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 669 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 670 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 671 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 672 /* it may happen that endpoints are not defined at this point 673 if it is the case, mark this edge for a second pass */ 674 if (cum != size -1) { 675 emarks[i] = 1; 676 if (print) { 677 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 678 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 679 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 680 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 681 } 682 eerr = PETSC_TRUE; 683 } 684 } 685 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 686 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 687 if (done) { 688 PetscInt *newprimals; 689 690 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 691 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 692 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 693 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 694 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 695 for (i=0;i<nee;i++) { 696 if (emarks[i]) { 697 PetscInt size,mark = i+1; 698 699 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 700 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 701 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 702 for (j=0;j<size;j++) { 703 PetscInt k,ee = idxs[j]; 704 for (k=ii[ee];k<ii[ee+1];k++) { 705 /* set all candidates located on the edge as corners */ 706 if (PetscBTLookup(btvcand,jj[k])) { 707 PetscInt k2,vv = jj[k]; 708 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 709 /* set all edge dofs connected to candidate as primals */ 710 for (k2=iit[vv];k2<iit[vv+1];k2++) { 711 if (marks[jjt[k2]] == mark) { 712 PetscInt k3,ee2 = jjt[k2]; 713 newprimals[cum++] = ee2; 714 /* finally set the new corners */ 715 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 716 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 717 } 718 } 719 } 720 } 721 } 722 } 723 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 724 } 725 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 726 } 727 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 728 if (fl2g) { 729 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 730 ierr = ISDestroy(&primals);CHKERRQ(ierr); 731 for (i=0;i<nee;i++) { 732 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 733 } 734 ierr = PetscFree(eedges);CHKERRQ(ierr); 735 } 736 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 737 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 738 ierr = PetscFree(newprimals);CHKERRQ(ierr); 739 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 740 ierr = ISDestroy(&primals);CHKERRQ(ierr); 741 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 742 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 743 if (fl2g) { 744 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 745 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 746 for (i=0;i<nee;i++) { 747 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 748 } 749 } else { 750 eedges = alleedges; 751 primals = allprimals; 752 } 753 754 /* Mark again */ 755 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 756 for (i=0;i<nee;i++) { 757 PetscInt size,mark = i+1; 758 759 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 760 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 761 for (j=0;j<size;j++) marks[idxs[j]] = mark; 762 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 763 } 764 if (print) { 765 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 766 ierr = ISView(primals,NULL);CHKERRQ(ierr); 767 } 768 769 /* Recompute extended cols */ 770 eerr = PETSC_FALSE; 771 for (i=0;i<nee;i++) { 772 PetscInt size; 773 774 cum = 0; 775 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 776 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 777 for (j=0;j<size;j++) { 778 PetscInt k,ee = idxs[j]; 779 for (k=ii[ee];k<ii[ee+1];k++) { 780 if (!PetscBTLookup(btv,jj[k])) { 781 extrow[cum++] = jj[k]; 782 } 783 } 784 } 785 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 786 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 787 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 788 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 789 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 790 if (cum != size -1) { 791 if (print) { 792 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 793 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 794 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 795 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 796 } 797 eerr = PETSC_TRUE; 798 } 799 } 800 } 801 ierr = MatRestoreRowIJ( lG,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 802 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 803 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 804 ierr = PetscFree(emarks);CHKERRQ(ierr); 805 /* an error should not occur at this point */ 806 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 807 808 /* Check the number of endpoints */ 809 /* TODO: fix case for circular edge */ 810 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 811 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 812 for (i=0;i<nee;i++) { 813 PetscInt size, found = 0; 814 815 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 816 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 817 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 818 for (j=0;j<size;j++) { 819 PetscInt k,ee = idxs[j]; 820 for (k=ii[ee];k<ii[ee+1];k++) { 821 PetscInt vv = jj[k]; 822 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 823 found++; 824 } 825 } 826 } 827 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 828 if (found != 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d\n",found,i); 829 } 830 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 831 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 832 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 833 834 #if defined(PETSC_USE_DEBUG) 835 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 836 not interfere with neighbouring coarse edges */ 837 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 838 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 839 for (i=0;i<nv;i++) { 840 PetscInt emax = 0,eemax = 0; 841 842 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 843 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 844 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 845 for (j=1;j<nee+1;j++) { 846 if (emax < emarks[j]) { 847 emax = emarks[j]; 848 eemax = j; 849 } 850 } 851 /* not relevant for edges */ 852 if (!eemax) continue; 853 854 for (j=ii[i];j<ii[i+1];j++) { 855 if (marks[jj[j]] && marks[jj[j]] != eemax) { 856 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 857 } 858 } 859 } 860 ierr = PetscFree(emarks);CHKERRQ(ierr); 861 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 862 #endif 863 864 /* Compute extended rows indices for edge blocks of the change of basis */ 865 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 866 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 867 extmem *= maxsize; 868 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 869 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 870 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 871 for (i=0;i<nv;i++) { 872 PetscInt mark = 0,size,start; 873 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 874 for (j=ii[i];j<ii[i+1];j++) 875 if (marks[jj[j]] && !mark) 876 mark = marks[jj[j]]; 877 878 /* not relevant */ 879 if (!mark) continue; 880 881 /* import extended row */ 882 mark--; 883 start = mark*extmem+extrowcum[mark]; 884 size = ii[i+1]-ii[i]; 885 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 886 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 887 extrowcum[mark] += size; 888 } 889 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 890 cum = 0; 891 for (i=0;i<nee;i++) { 892 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 893 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 894 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 895 cum = PetscMax(cum,size); 896 } 897 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 898 ierr = PetscFree(marks);CHKERRQ(ierr); 899 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 900 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 901 902 /* Workspace for lapack inner calls and VecSetValues */ 903 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 904 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 905 for (i=0;i<maxsize;i++) vals[i] = 1.; 906 907 /* Create vectors for quadrature rules */ 908 /* TODO preserve other quadratures */ 909 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 910 for (i=0;i<nquads;i++) { 911 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 912 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 913 } 914 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 915 916 /* Create change of basis matrix (preallocation can be improved) */ 917 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 918 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 919 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 920 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 921 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 922 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 923 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 924 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 925 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 926 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 927 928 /* Defaults to identity */ 929 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 930 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 931 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 932 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 933 934 for (i=0;i<nee;i++) { 935 Mat Gins = NULL, GKins = NULL; 936 937 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 938 if (Gins && GKins) { 939 PetscScalar *data; 940 const PetscInt *rows,*cols; 941 PetscInt nrh,nch,nrc,ncc; 942 943 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 944 /* H1 */ 945 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 946 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 947 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 948 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 949 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 950 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 951 /* complement */ 952 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 953 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 954 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 955 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 956 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 957 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 958 /* Gins kernel quadratures */ 959 for (j=0;j<ncc;j++) { 960 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 961 } 962 /* H1 average */ 963 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 964 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 965 } 966 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 967 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 968 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 969 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 970 } 971 972 /* Start assembling */ 973 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 974 for (i=0;i<nquads;i++) { 975 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 976 } 977 978 /* Free */ 979 if (fl2g) { 980 ierr = ISDestroy(&primals);CHKERRQ(ierr); 981 for (i=0;i<nee;i++) { 982 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 983 } 984 ierr = PetscFree(eedges);CHKERRQ(ierr); 985 } 986 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 987 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 988 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 989 ierr = PetscFree(extrow);CHKERRQ(ierr); 990 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 991 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 992 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 993 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 994 ierr = PetscFree(vals);CHKERRQ(ierr); 995 ierr = PetscFree(extrows);CHKERRQ(ierr); 996 ierr = PetscFree(extcols);CHKERRQ(ierr); 997 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 998 ierr = MatDestroy(&lG);CHKERRQ(ierr); 999 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1000 1001 /* Complete assembling */ 1002 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1003 for (i=0;i<nquads;i++) { 1004 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 1005 } 1006 for (i=0;i<nquads;i++) { 1007 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 1008 } 1009 ierr = PetscFree(quads);CHKERRQ(ierr); 1010 1011 /* tell PCBDDC the topography has been analyzed */ 1012 pcbddc->recompute_topography = PETSC_FALSE; 1013 1014 /* set change of basis */ 1015 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1016 ierr = MatDestroy(&T);CHKERRQ(ierr); 1017 1018 /* set quadratures */ 1019 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 1020 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 1021 1022 PetscFunctionReturn(0); 1023 } 1024 1025 /* the near-null space of BDDC carries information on quadrature weights, 1026 and these can be collinear -> so cheat with MatNullSpaceCreate 1027 and create a suitable set of basis vectors first */ 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1030 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1031 { 1032 PetscErrorCode ierr; 1033 PetscInt i; 1034 1035 PetscFunctionBegin; 1036 for (i=0;i<nvecs;i++) { 1037 PetscInt first,last; 1038 1039 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1040 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1041 if (i>=first && i < last) { 1042 PetscScalar *data; 1043 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1044 if (!has_const) { 1045 data[i-first] = 1.; 1046 } else { 1047 data[2*i-first] = 1./PetscSqrtReal(2.); 1048 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1049 } 1050 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1051 } 1052 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1053 } 1054 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1055 for (i=0;i<nvecs;i++) { /* reset vectors */ 1056 PetscInt first,last; 1057 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1058 if (i>=first && i < last) { 1059 PetscScalar *data; 1060 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1061 if (!has_const) { 1062 data[i-first] = 0.; 1063 } else { 1064 data[2*i-first] = 0.; 1065 data[2*i-first+1] = 0.; 1066 } 1067 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1068 } 1069 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1070 } 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1076 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1077 { 1078 Mat loc_divudotp; 1079 Vec p,v,vins,quad_vec,*quad_vecs; 1080 ISLocalToGlobalMapping map; 1081 IS *faces,*edges; 1082 PetscScalar *vals; 1083 const PetscScalar *array; 1084 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1085 PetscMPIInt rank; 1086 PetscErrorCode ierr; 1087 1088 PetscFunctionBegin; 1089 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1090 if (graph->twodim) { 1091 lmaxneighs = 2; 1092 } else { 1093 lmaxneighs = 1; 1094 for (i=0;i<ne;i++) { 1095 const PetscInt *idxs; 1096 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1097 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1098 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1099 } 1100 lmaxneighs++; /* graph count does not include self */ 1101 } 1102 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1103 maxsize = 0; 1104 for (i=0;i<ne;i++) { 1105 PetscInt nn; 1106 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1107 maxsize = PetscMax(maxsize,nn); 1108 } 1109 for (i=0;i<nf;i++) { 1110 PetscInt nn; 1111 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1112 maxsize = PetscMax(maxsize,nn); 1113 } 1114 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1115 /* create vectors to hold quadrature weights */ 1116 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1117 if (!transpose) { 1118 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1119 } else { 1120 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1121 } 1122 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1123 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1124 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1125 for (i=0;i<maxneighs;i++) { 1126 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1127 } 1128 1129 /* compute local quad vec */ 1130 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1131 if (!transpose) { 1132 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1133 } else { 1134 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1135 } 1136 ierr = VecSet(p,1.);CHKERRQ(ierr); 1137 if (!transpose) { 1138 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1139 } else { 1140 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1141 } 1142 if (vl2l) { 1143 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1144 } else { 1145 vins = v; 1146 } 1147 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1148 ierr = VecDestroy(&p);CHKERRQ(ierr); 1149 1150 /* insert in global quadrature vecs */ 1151 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1152 for (i=0;i<nf;i++) { 1153 const PetscInt *idxs; 1154 PetscInt idx,nn,j; 1155 1156 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1157 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1158 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1159 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1160 idx = -(idx+1); 1161 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1162 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1163 } 1164 for (i=0;i<ne;i++) { 1165 const PetscInt *idxs; 1166 PetscInt idx,nn,j; 1167 1168 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1169 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1170 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1171 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1172 idx = -(idx+1); 1173 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1174 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1175 } 1176 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1177 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1178 if (vl2l) { 1179 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1180 } 1181 ierr = VecDestroy(&v);CHKERRQ(ierr); 1182 ierr = PetscFree(vals);CHKERRQ(ierr); 1183 1184 /* assemble near null space */ 1185 for (i=0;i<maxneighs;i++) { 1186 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1187 } 1188 for (i=0;i<maxneighs;i++) { 1189 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1190 } 1191 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1198 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1199 { 1200 PetscErrorCode ierr; 1201 Vec local,global; 1202 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1203 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1204 1205 PetscFunctionBegin; 1206 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1207 /* need to convert from global to local topology information and remove references to information in global ordering */ 1208 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1209 if (pcbddc->user_provided_isfordofs) { 1210 if (pcbddc->n_ISForDofs) { 1211 PetscInt i; 1212 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1213 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1214 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1215 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1216 } 1217 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1218 pcbddc->n_ISForDofs = 0; 1219 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1220 } 1221 } else { 1222 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1223 PetscInt i, n = matis->A->rmap->n; 1224 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1225 if (i > 1) { 1226 pcbddc->n_ISForDofsLocal = i; 1227 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1228 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1229 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1230 } 1231 } 1232 } 1233 } 1234 1235 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1236 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1237 } 1238 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1239 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1240 } 1241 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1242 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1243 } 1244 ierr = VecDestroy(&global);CHKERRQ(ierr); 1245 ierr = VecDestroy(&local);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1251 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1252 { 1253 PC_IS *pcis = (PC_IS*)(pc->data); 1254 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1255 PetscErrorCode ierr; 1256 1257 PetscFunctionBegin; 1258 if (!pcbddc->benign_have_null) { 1259 PetscFunctionReturn(0); 1260 } 1261 if (pcbddc->ChangeOfBasisMatrix) { 1262 Vec swap; 1263 1264 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1265 swap = pcbddc->work_change; 1266 pcbddc->work_change = r; 1267 r = swap; 1268 } 1269 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1271 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1272 ierr = VecSet(z,0.);CHKERRQ(ierr); 1273 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1274 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1275 if (pcbddc->ChangeOfBasisMatrix) { 1276 Vec swap; 1277 1278 swap = r; 1279 r = pcbddc->work_change; 1280 pcbddc->work_change = swap; 1281 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1282 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1283 } 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1289 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1290 { 1291 PCBDDCBenignMatMult_ctx ctx; 1292 PetscErrorCode ierr; 1293 PetscBool apply_right,apply_left,reset_x; 1294 1295 PetscFunctionBegin; 1296 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1297 if (transpose) { 1298 apply_right = ctx->apply_left; 1299 apply_left = ctx->apply_right; 1300 } else { 1301 apply_right = ctx->apply_right; 1302 apply_left = ctx->apply_left; 1303 } 1304 reset_x = PETSC_FALSE; 1305 if (apply_right) { 1306 const PetscScalar *ax; 1307 PetscInt nl,i; 1308 1309 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1310 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1311 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1312 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1313 for (i=0;i<ctx->benign_n;i++) { 1314 PetscScalar sum,val; 1315 const PetscInt *idxs; 1316 PetscInt nz,j; 1317 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1318 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1319 sum = 0.; 1320 if (ctx->apply_p0) { 1321 val = ctx->work[idxs[nz-1]]; 1322 for (j=0;j<nz-1;j++) { 1323 sum += ctx->work[idxs[j]]; 1324 ctx->work[idxs[j]] += val; 1325 } 1326 } else { 1327 for (j=0;j<nz-1;j++) { 1328 sum += ctx->work[idxs[j]]; 1329 } 1330 } 1331 ctx->work[idxs[nz-1]] -= sum; 1332 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1333 } 1334 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1335 reset_x = PETSC_TRUE; 1336 } 1337 if (transpose) { 1338 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1339 } else { 1340 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1341 } 1342 if (reset_x) { 1343 ierr = VecResetArray(x);CHKERRQ(ierr); 1344 } 1345 if (apply_left) { 1346 PetscScalar *ay; 1347 PetscInt i; 1348 1349 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1350 for (i=0;i<ctx->benign_n;i++) { 1351 PetscScalar sum,val; 1352 const PetscInt *idxs; 1353 PetscInt nz,j; 1354 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1355 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1356 val = -ay[idxs[nz-1]]; 1357 if (ctx->apply_p0) { 1358 sum = 0.; 1359 for (j=0;j<nz-1;j++) { 1360 sum += ay[idxs[j]]; 1361 ay[idxs[j]] += val; 1362 } 1363 ay[idxs[nz-1]] += sum; 1364 } else { 1365 for (j=0;j<nz-1;j++) { 1366 ay[idxs[j]] += val; 1367 } 1368 ay[idxs[nz-1]] = 0.; 1369 } 1370 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1371 } 1372 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1373 } 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1379 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1380 { 1381 PetscErrorCode ierr; 1382 1383 PetscFunctionBegin; 1384 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1390 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1391 { 1392 PetscErrorCode ierr; 1393 1394 PetscFunctionBegin; 1395 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1396 PetscFunctionReturn(0); 1397 } 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "PCBDDCBenignShellMat" 1401 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1402 { 1403 PC_IS *pcis = (PC_IS*)pc->data; 1404 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1405 PCBDDCBenignMatMult_ctx ctx; 1406 PetscErrorCode ierr; 1407 1408 PetscFunctionBegin; 1409 if (!restore) { 1410 Mat A_IB,A_BI; 1411 PetscScalar *work; 1412 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1413 1414 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1415 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1416 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1417 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1418 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1419 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1420 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1421 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1422 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1423 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1424 ctx->apply_left = PETSC_TRUE; 1425 ctx->apply_right = PETSC_FALSE; 1426 ctx->apply_p0 = PETSC_FALSE; 1427 ctx->benign_n = pcbddc->benign_n; 1428 if (reuse) { 1429 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1430 ctx->free = PETSC_FALSE; 1431 } else { /* TODO: could be optimized for successive solves */ 1432 ISLocalToGlobalMapping N_to_D; 1433 PetscInt i; 1434 1435 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1436 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1437 for (i=0;i<pcbddc->benign_n;i++) { 1438 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1439 } 1440 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1441 ctx->free = PETSC_TRUE; 1442 } 1443 ctx->A = pcis->A_IB; 1444 ctx->work = work; 1445 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1446 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1447 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 pcis->A_IB = A_IB; 1449 1450 /* A_BI as A_IB^T */ 1451 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1452 pcbddc->benign_original_mat = pcis->A_BI; 1453 pcis->A_BI = A_BI; 1454 } else { 1455 if (!pcbddc->benign_original_mat) { 1456 PetscFunctionReturn(0); 1457 } 1458 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1459 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1460 pcis->A_IB = ctx->A; 1461 ctx->A = NULL; 1462 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1463 pcis->A_BI = pcbddc->benign_original_mat; 1464 pcbddc->benign_original_mat = NULL; 1465 if (ctx->free) { 1466 PetscInt i; 1467 for (i=0;i<ctx->benign_n;i++) { 1468 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1469 } 1470 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1471 } 1472 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1473 ierr = PetscFree(ctx);CHKERRQ(ierr); 1474 } 1475 PetscFunctionReturn(0); 1476 } 1477 1478 /* used just in bddc debug mode */ 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "PCBDDCBenignProject" 1481 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1482 { 1483 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1484 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1485 Mat An; 1486 PetscErrorCode ierr; 1487 1488 PetscFunctionBegin; 1489 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1490 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1491 if (is1) { 1492 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1493 ierr = MatDestroy(&An);CHKERRQ(ierr); 1494 } else { 1495 *B = An; 1496 } 1497 PetscFunctionReturn(0); 1498 } 1499 1500 /* TODO: add reuse flag */ 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatSeqAIJCompress" 1503 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1504 { 1505 Mat Bt; 1506 PetscScalar *a,*bdata; 1507 const PetscInt *ii,*ij; 1508 PetscInt m,n,i,nnz,*bii,*bij; 1509 PetscBool flg_row; 1510 PetscErrorCode ierr; 1511 1512 PetscFunctionBegin; 1513 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1514 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1515 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1516 nnz = n; 1517 for (i=0;i<ii[n];i++) { 1518 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1519 } 1520 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1521 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1522 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1523 nnz = 0; 1524 bii[0] = 0; 1525 for (i=0;i<n;i++) { 1526 PetscInt j; 1527 for (j=ii[i];j<ii[i+1];j++) { 1528 PetscScalar entry = a[j]; 1529 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1530 bij[nnz] = ij[j]; 1531 bdata[nnz] = entry; 1532 nnz++; 1533 } 1534 } 1535 bii[i+1] = nnz; 1536 } 1537 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1538 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1539 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1540 { 1541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1542 b->free_a = PETSC_TRUE; 1543 b->free_ij = PETSC_TRUE; 1544 } 1545 *B = Bt; 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatDetectDisconnectedComponents" 1551 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1552 { 1553 Mat B; 1554 IS is_dummy,*cc_n; 1555 ISLocalToGlobalMapping l2gmap_dummy; 1556 PCBDDCGraph graph; 1557 PetscInt i,n; 1558 PetscInt *xadj,*adjncy; 1559 PetscInt *xadj_filtered,*adjncy_filtered; 1560 PetscBool flg_row,isseqaij; 1561 PetscErrorCode ierr; 1562 1563 PetscFunctionBegin; 1564 if (!A->rmap->N || !A->cmap->N) { 1565 *ncc = 0; 1566 *cc = NULL; 1567 PetscFunctionReturn(0); 1568 } 1569 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1570 if (!isseqaij && filter) { 1571 PetscBool isseqdense; 1572 1573 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1574 if (!isseqdense) { 1575 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1576 } else { /* TODO: rectangular case and LDA */ 1577 PetscScalar *array; 1578 PetscReal chop=1.e-6; 1579 1580 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1581 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1582 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1583 for (i=0;i<n;i++) { 1584 PetscInt j; 1585 for (j=i+1;j<n;j++) { 1586 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1587 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1588 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1589 } 1590 } 1591 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1592 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1593 } 1594 } else { 1595 B = A; 1596 } 1597 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1598 1599 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1600 if (filter) { 1601 PetscScalar *data; 1602 PetscInt j,cum; 1603 1604 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1605 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1606 cum = 0; 1607 for (i=0;i<n;i++) { 1608 PetscInt t; 1609 1610 for (j=xadj[i];j<xadj[i+1];j++) { 1611 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1612 continue; 1613 } 1614 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1615 } 1616 t = xadj_filtered[i]; 1617 xadj_filtered[i] = cum; 1618 cum += t; 1619 } 1620 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1621 } else { 1622 xadj_filtered = NULL; 1623 adjncy_filtered = NULL; 1624 } 1625 1626 /* compute local connected components using PCBDDCGraph */ 1627 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1628 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1629 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1630 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1631 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1632 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1633 if (xadj_filtered) { 1634 graph->xadj = xadj_filtered; 1635 graph->adjncy = adjncy_filtered; 1636 } else { 1637 graph->xadj = xadj; 1638 graph->adjncy = adjncy; 1639 } 1640 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1641 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1642 /* partial clean up */ 1643 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1644 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1645 if (A != B) { 1646 ierr = MatDestroy(&B);CHKERRQ(ierr); 1647 } 1648 1649 /* get back data */ 1650 if (ncc) *ncc = graph->ncc; 1651 if (cc) { 1652 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1653 for (i=0;i<graph->ncc;i++) { 1654 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1655 } 1656 *cc = cc_n; 1657 } 1658 /* clean up graph */ 1659 graph->xadj = 0; 1660 graph->adjncy = 0; 1661 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1662 PetscFunctionReturn(0); 1663 } 1664 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "PCBDDCBenignCheck" 1667 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1668 { 1669 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1670 PC_IS* pcis = (PC_IS*)(pc->data); 1671 IS dirIS = NULL; 1672 PetscInt i; 1673 PetscErrorCode ierr; 1674 1675 PetscFunctionBegin; 1676 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1677 if (zerodiag) { 1678 Mat A; 1679 Vec vec3_N; 1680 PetscScalar *vals; 1681 const PetscInt *idxs; 1682 PetscInt nz,*count; 1683 1684 /* p0 */ 1685 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1686 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1687 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1688 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1689 for (i=0;i<nz;i++) vals[i] = 1.; 1690 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1691 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1692 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1693 /* v_I */ 1694 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1695 for (i=0;i<nz;i++) vals[i] = 0.; 1696 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1697 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1698 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1699 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1700 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1701 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1702 if (dirIS) { 1703 PetscInt n; 1704 1705 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1706 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1707 for (i=0;i<n;i++) vals[i] = 0.; 1708 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1709 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1710 } 1711 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1712 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1713 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1714 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1715 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1716 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1717 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1718 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1719 ierr = PetscFree(vals);CHKERRQ(ierr); 1720 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1721 1722 /* there should not be any pressure dofs lying on the interface */ 1723 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1724 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1725 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1726 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1727 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1728 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1729 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1730 ierr = PetscFree(count);CHKERRQ(ierr); 1731 } 1732 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1733 1734 /* check PCBDDCBenignGetOrSetP0 */ 1735 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1736 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1737 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1738 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1739 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1740 for (i=0;i<pcbddc->benign_n;i++) { 1741 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1742 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1743 } 1744 PetscFunctionReturn(0); 1745 } 1746 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1749 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1750 { 1751 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1752 IS pressures,zerodiag,*zerodiag_subs; 1753 PetscInt nz,n; 1754 PetscInt *interior_dofs,n_interior_dofs; 1755 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1756 PetscErrorCode ierr; 1757 1758 PetscFunctionBegin; 1759 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1760 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1761 for (n=0;n<pcbddc->benign_n;n++) { 1762 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1763 } 1764 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1765 pcbddc->benign_n = 0; 1766 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1767 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1768 Checks if all the pressure dofs in each subdomain have a zero diagonal 1769 If not, a change of basis on pressures is not needed 1770 since the local Schur complements are already SPD 1771 */ 1772 has_null_pressures = PETSC_TRUE; 1773 have_null = PETSC_TRUE; 1774 if (pcbddc->n_ISForDofsLocal) { 1775 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1776 1777 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1778 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1779 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1780 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1781 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1782 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1783 if (!sorted) { 1784 ierr = ISSort(pressures);CHKERRQ(ierr); 1785 } 1786 } else { 1787 pressures = NULL; 1788 } 1789 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1790 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1791 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1792 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1793 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1794 if (!sorted) { 1795 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1796 } 1797 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1798 if (!nz) { 1799 if (n) have_null = PETSC_FALSE; 1800 has_null_pressures = PETSC_FALSE; 1801 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1802 } 1803 recompute_zerodiag = PETSC_FALSE; 1804 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1805 zerodiag_subs = NULL; 1806 pcbddc->benign_n = 0; 1807 n_interior_dofs = 0; 1808 interior_dofs = NULL; 1809 if (pcbddc->current_level) { /* need to compute interior nodes */ 1810 PetscInt n,i,j; 1811 PetscInt n_neigh,*neigh,*n_shared,**shared; 1812 PetscInt *iwork; 1813 1814 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1815 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1816 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1817 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1818 for (i=1;i<n_neigh;i++) 1819 for (j=0;j<n_shared[i];j++) 1820 iwork[shared[i][j]] += 1; 1821 for (i=0;i<n;i++) 1822 if (!iwork[i]) 1823 interior_dofs[n_interior_dofs++] = i; 1824 ierr = PetscFree(iwork);CHKERRQ(ierr); 1825 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1826 } 1827 if (has_null_pressures) { 1828 IS *subs; 1829 PetscInt nsubs,i,j,nl; 1830 const PetscInt *idxs; 1831 PetscScalar *array; 1832 Vec *work; 1833 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1834 1835 subs = pcbddc->local_subs; 1836 nsubs = pcbddc->n_local_subs; 1837 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1838 if (pcbddc->current_level) { 1839 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1840 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1841 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1842 /* work[0] = 1_p */ 1843 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1844 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1845 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1846 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1847 /* work[0] = 1_v */ 1848 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1849 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1850 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1851 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1852 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1853 } 1854 if (nsubs > 1) { 1855 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1856 for (i=0;i<nsubs;i++) { 1857 ISLocalToGlobalMapping l2g; 1858 IS t_zerodiag_subs; 1859 PetscInt nl; 1860 1861 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1862 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1863 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1864 if (nl) { 1865 PetscBool valid = PETSC_TRUE; 1866 1867 if (pcbddc->current_level) { 1868 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1869 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1870 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1871 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1872 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1873 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1874 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 1875 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 1876 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1877 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 1878 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1879 for (j=0;j<n_interior_dofs;j++) { 1880 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1881 valid = PETSC_FALSE; 1882 break; 1883 } 1884 } 1885 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1886 } 1887 if (valid && pcbddc->NeumannBoundariesLocal) { 1888 IS t_bc; 1889 PetscInt nzb; 1890 1891 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 1892 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 1893 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 1894 if (nzb) valid = PETSC_FALSE; 1895 } 1896 if (valid && pressures) { 1897 IS t_pressure_subs; 1898 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 1899 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 1900 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 1901 } 1902 if (valid) { 1903 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 1904 pcbddc->benign_n++; 1905 } else { 1906 recompute_zerodiag = PETSC_TRUE; 1907 } 1908 } 1909 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 1910 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 1911 } 1912 } else { /* there's just one subdomain (or zero if they have not been detected */ 1913 PetscBool valid = PETSC_TRUE; 1914 1915 if (pcbddc->NeumannBoundariesLocal) { 1916 PetscInt nzb; 1917 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 1918 if (nzb) valid = PETSC_FALSE; 1919 } 1920 if (valid && pressures) { 1921 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 1922 } 1923 if (valid && pcbddc->current_level) { 1924 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 1925 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 1926 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1927 for (j=0;j<n_interior_dofs;j++) { 1928 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1929 valid = PETSC_FALSE; 1930 break; 1931 } 1932 } 1933 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1934 } 1935 if (valid) { 1936 pcbddc->benign_n = 1; 1937 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 1938 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 1939 zerodiag_subs[0] = zerodiag; 1940 } 1941 } 1942 if (pcbddc->current_level) { 1943 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 1944 } 1945 } 1946 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 1947 1948 if (!pcbddc->benign_n) { 1949 PetscInt n; 1950 1951 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1952 recompute_zerodiag = PETSC_FALSE; 1953 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1954 if (n) { 1955 has_null_pressures = PETSC_FALSE; 1956 have_null = PETSC_FALSE; 1957 } 1958 } 1959 1960 /* final check for null pressures */ 1961 if (zerodiag && pressures) { 1962 PetscInt nz,np; 1963 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1964 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 1965 if (nz != np) have_null = PETSC_FALSE; 1966 } 1967 1968 if (recompute_zerodiag) { 1969 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1970 if (pcbddc->benign_n == 1) { 1971 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 1972 zerodiag = zerodiag_subs[0]; 1973 } else { 1974 PetscInt i,nzn,*new_idxs; 1975 1976 nzn = 0; 1977 for (i=0;i<pcbddc->benign_n;i++) { 1978 PetscInt ns; 1979 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 1980 nzn += ns; 1981 } 1982 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 1983 nzn = 0; 1984 for (i=0;i<pcbddc->benign_n;i++) { 1985 PetscInt ns,*idxs; 1986 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 1987 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1988 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 1989 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1990 nzn += ns; 1991 } 1992 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 1993 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 1994 } 1995 have_null = PETSC_FALSE; 1996 } 1997 1998 /* Prepare matrix to compute no-net-flux */ 1999 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2000 Mat A,loc_divudotp; 2001 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2002 IS row,col,isused = NULL; 2003 PetscInt M,N,n,st,n_isused; 2004 2005 if (pressures) { 2006 isused = pressures; 2007 } else { 2008 isused = zerodiag; 2009 } 2010 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2011 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2012 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2013 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2014 n_isused = 0; 2015 if (isused) { 2016 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2017 } 2018 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2019 st = st-n_isused; 2020 if (n) { 2021 const PetscInt *gidxs; 2022 2023 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2024 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2025 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2026 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2027 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2028 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2029 } else { 2030 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2031 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2032 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2033 } 2034 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2035 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2036 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2037 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2038 ierr = ISDestroy(&row);CHKERRQ(ierr); 2039 ierr = ISDestroy(&col);CHKERRQ(ierr); 2040 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2041 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2042 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2043 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2044 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2045 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2046 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2047 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2048 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2049 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2050 } 2051 2052 /* change of basis and p0 dofs */ 2053 if (has_null_pressures) { 2054 IS zerodiagc; 2055 const PetscInt *idxs,*idxsc; 2056 PetscInt i,s,*nnz; 2057 2058 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2059 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2060 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2061 /* local change of basis for pressures */ 2062 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2063 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2064 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2065 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2066 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2067 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2068 for (i=0;i<pcbddc->benign_n;i++) { 2069 PetscInt nzs,j; 2070 2071 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2072 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2073 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2074 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2075 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2076 } 2077 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2078 ierr = PetscFree(nnz);CHKERRQ(ierr); 2079 /* set identity on velocities */ 2080 for (i=0;i<n-nz;i++) { 2081 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2082 } 2083 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2084 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2085 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2086 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2087 /* set change on pressures */ 2088 for (s=0;s<pcbddc->benign_n;s++) { 2089 PetscScalar *array; 2090 PetscInt nzs; 2091 2092 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2093 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2094 for (i=0;i<nzs-1;i++) { 2095 PetscScalar vals[2]; 2096 PetscInt cols[2]; 2097 2098 cols[0] = idxs[i]; 2099 cols[1] = idxs[nzs-1]; 2100 vals[0] = 1.; 2101 vals[1] = 1.; 2102 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2103 } 2104 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2105 for (i=0;i<nzs-1;i++) array[i] = -1.; 2106 array[nzs-1] = 1.; 2107 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2108 /* store local idxs for p0 */ 2109 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2110 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2111 ierr = PetscFree(array);CHKERRQ(ierr); 2112 } 2113 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2114 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2115 /* project if needed */ 2116 if (pcbddc->benign_change_explicit) { 2117 Mat M; 2118 2119 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2120 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2121 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2122 ierr = MatDestroy(&M);CHKERRQ(ierr); 2123 } 2124 /* store global idxs for p0 */ 2125 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2126 } 2127 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2128 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2129 2130 /* determines if the coarse solver will be singular or not */ 2131 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2132 /* determines if the problem has subdomains with 0 pressure block */ 2133 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2134 *zerodiaglocal = zerodiag; 2135 PetscFunctionReturn(0); 2136 } 2137 2138 #undef __FUNCT__ 2139 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2140 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2141 { 2142 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2143 PetscScalar *array; 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 if (!pcbddc->benign_sf) { 2148 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2149 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2150 } 2151 if (get) { 2152 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2153 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2154 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2155 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2156 } else { 2157 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2158 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2159 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2160 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2161 } 2162 PetscFunctionReturn(0); 2163 } 2164 2165 #undef __FUNCT__ 2166 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2167 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2168 { 2169 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 /* TODO: add error checking 2174 - avoid nested pop (or push) calls. 2175 - cannot push before pop. 2176 - cannot call this if pcbddc->local_mat is NULL 2177 */ 2178 if (!pcbddc->benign_n) { 2179 PetscFunctionReturn(0); 2180 } 2181 if (pop) { 2182 if (pcbddc->benign_change_explicit) { 2183 IS is_p0; 2184 MatReuse reuse; 2185 2186 /* extract B_0 */ 2187 reuse = MAT_INITIAL_MATRIX; 2188 if (pcbddc->benign_B0) { 2189 reuse = MAT_REUSE_MATRIX; 2190 } 2191 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2192 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2193 /* remove rows and cols from local problem */ 2194 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2195 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2196 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2197 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2198 } else { 2199 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2200 PetscScalar *vals; 2201 PetscInt i,n,*idxs_ins; 2202 2203 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2204 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2205 if (!pcbddc->benign_B0) { 2206 PetscInt *nnz; 2207 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2208 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2209 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2210 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2211 for (i=0;i<pcbddc->benign_n;i++) { 2212 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2213 nnz[i] = n - nnz[i]; 2214 } 2215 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2216 ierr = PetscFree(nnz);CHKERRQ(ierr); 2217 } 2218 2219 for (i=0;i<pcbddc->benign_n;i++) { 2220 PetscScalar *array; 2221 PetscInt *idxs,j,nz,cum; 2222 2223 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2224 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2225 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2226 for (j=0;j<nz;j++) vals[j] = 1.; 2227 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2228 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2229 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2230 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2231 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2232 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2233 cum = 0; 2234 for (j=0;j<n;j++) { 2235 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2236 vals[cum] = array[j]; 2237 idxs_ins[cum] = j; 2238 cum++; 2239 } 2240 } 2241 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2242 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2243 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2244 } 2245 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2246 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2247 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2248 } 2249 } else { /* push */ 2250 if (pcbddc->benign_change_explicit) { 2251 PetscInt i; 2252 2253 for (i=0;i<pcbddc->benign_n;i++) { 2254 PetscScalar *B0_vals; 2255 PetscInt *B0_cols,B0_ncol; 2256 2257 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2258 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2259 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2260 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2261 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2262 } 2263 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2264 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2265 } else { 2266 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2267 } 2268 } 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2274 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2275 { 2276 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2277 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2278 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2279 PetscBLASInt *B_iwork,*B_ifail; 2280 PetscScalar *work,lwork; 2281 PetscScalar *St,*S,*eigv; 2282 PetscScalar *Sarray,*Starray; 2283 PetscReal *eigs,thresh; 2284 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2285 PetscBool allocated_S_St; 2286 #if defined(PETSC_USE_COMPLEX) 2287 PetscReal *rwork; 2288 #endif 2289 PetscErrorCode ierr; 2290 2291 PetscFunctionBegin; 2292 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2293 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2294 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2295 2296 if (pcbddc->dbg_flag) { 2297 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2298 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2299 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2300 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2301 } 2302 2303 if (pcbddc->dbg_flag) { 2304 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2305 } 2306 2307 /* max size of subsets */ 2308 mss = 0; 2309 for (i=0;i<sub_schurs->n_subs;i++) { 2310 PetscInt subset_size; 2311 2312 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2313 mss = PetscMax(mss,subset_size); 2314 } 2315 2316 /* min/max and threshold */ 2317 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2318 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2319 nmax = PetscMax(nmin,nmax); 2320 allocated_S_St = PETSC_FALSE; 2321 if (nmin) { 2322 allocated_S_St = PETSC_TRUE; 2323 } 2324 2325 /* allocate lapack workspace */ 2326 cum = cum2 = 0; 2327 maxneigs = 0; 2328 for (i=0;i<sub_schurs->n_subs;i++) { 2329 PetscInt n,subset_size; 2330 2331 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2332 n = PetscMin(subset_size,nmax); 2333 cum += subset_size; 2334 cum2 += subset_size*n; 2335 maxneigs = PetscMax(maxneigs,n); 2336 } 2337 if (mss) { 2338 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2339 PetscBLASInt B_itype = 1; 2340 PetscBLASInt B_N = mss; 2341 PetscReal zero = 0.0; 2342 PetscReal eps = 0.0; /* dlamch? */ 2343 2344 B_lwork = -1; 2345 S = NULL; 2346 St = NULL; 2347 eigs = NULL; 2348 eigv = NULL; 2349 B_iwork = NULL; 2350 B_ifail = NULL; 2351 #if defined(PETSC_USE_COMPLEX) 2352 rwork = NULL; 2353 #endif 2354 thresh = 1.0; 2355 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2356 #if defined(PETSC_USE_COMPLEX) 2357 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2358 #else 2359 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2360 #endif 2361 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2362 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2363 } else { 2364 /* TODO */ 2365 } 2366 } else { 2367 lwork = 0; 2368 } 2369 2370 nv = 0; 2371 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2372 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2373 } 2374 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2375 if (allocated_S_St) { 2376 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2377 } 2378 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2379 #if defined(PETSC_USE_COMPLEX) 2380 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2381 #endif 2382 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2383 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2384 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2385 nv+cum,&pcbddc->adaptive_constraints_idxs, 2386 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2387 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2388 2389 maxneigs = 0; 2390 cum = cumarray = 0; 2391 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2392 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2393 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2394 const PetscInt *idxs; 2395 2396 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2397 for (cum=0;cum<nv;cum++) { 2398 pcbddc->adaptive_constraints_n[cum] = 1; 2399 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2400 pcbddc->adaptive_constraints_data[cum] = 1.0; 2401 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2402 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2403 } 2404 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2405 } 2406 2407 if (mss) { /* multilevel */ 2408 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2409 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2410 } 2411 2412 thresh = pcbddc->adaptive_threshold; 2413 for (i=0;i<sub_schurs->n_subs;i++) { 2414 const PetscInt *idxs; 2415 PetscReal upper,lower; 2416 PetscInt j,subset_size,eigs_start = 0; 2417 PetscBLASInt B_N; 2418 PetscBool same_data = PETSC_FALSE; 2419 2420 if (pcbddc->use_deluxe_scaling) { 2421 upper = PETSC_MAX_REAL; 2422 lower = thresh; 2423 } else { 2424 upper = 1./thresh; 2425 lower = 0.; 2426 } 2427 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2428 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2429 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2430 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2431 if (sub_schurs->is_hermitian) { 2432 PetscInt j,k; 2433 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2434 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2435 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2436 } 2437 for (j=0;j<subset_size;j++) { 2438 for (k=j;k<subset_size;k++) { 2439 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2440 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2441 } 2442 } 2443 } else { 2444 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2445 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2446 } 2447 } else { 2448 S = Sarray + cumarray; 2449 St = Starray + cumarray; 2450 } 2451 /* see if we can save some work */ 2452 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2453 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2454 } 2455 2456 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2457 B_neigs = 0; 2458 } else { 2459 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2460 PetscBLASInt B_itype = 1; 2461 PetscBLASInt B_IL, B_IU; 2462 PetscReal eps = -1.0; /* dlamch? */ 2463 PetscInt nmin_s; 2464 PetscBool compute_range = PETSC_FALSE; 2465 2466 if (pcbddc->dbg_flag) { 2467 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2468 } 2469 2470 compute_range = PETSC_FALSE; 2471 if (thresh > 1.+PETSC_SMALL && !same_data) { 2472 compute_range = PETSC_TRUE; 2473 } 2474 2475 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2476 if (compute_range) { 2477 2478 /* ask for eigenvalues larger than thresh */ 2479 #if defined(PETSC_USE_COMPLEX) 2480 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2481 #else 2482 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2483 #endif 2484 } else if (!same_data) { 2485 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2486 B_IL = 1; 2487 #if defined(PETSC_USE_COMPLEX) 2488 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2489 #else 2490 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2491 #endif 2492 } else { /* same_data is true, so get the adaptive function requested by the user */ 2493 PetscInt k; 2494 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2495 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2496 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2497 nmin = nmax; 2498 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2499 for (k=0;k<nmax;k++) { 2500 eigs[k] = 1./PETSC_SMALL; 2501 eigv[k*(subset_size+1)] = 1.0; 2502 } 2503 } 2504 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2505 if (B_ierr) { 2506 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2507 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2508 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2509 } 2510 2511 if (B_neigs > nmax) { 2512 if (pcbddc->dbg_flag) { 2513 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2514 } 2515 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2516 B_neigs = nmax; 2517 } 2518 2519 nmin_s = PetscMin(nmin,B_N); 2520 if (B_neigs < nmin_s) { 2521 PetscBLASInt B_neigs2; 2522 2523 if (pcbddc->use_deluxe_scaling) { 2524 B_IL = B_N - nmin_s + 1; 2525 B_IU = B_N - B_neigs; 2526 } else { 2527 B_IL = B_neigs + 1; 2528 B_IU = nmin_s; 2529 } 2530 if (pcbddc->dbg_flag) { 2531 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2532 } 2533 if (sub_schurs->is_hermitian) { 2534 PetscInt j,k; 2535 for (j=0;j<subset_size;j++) { 2536 for (k=j;k<subset_size;k++) { 2537 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2538 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2539 } 2540 } 2541 } else { 2542 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2543 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2544 } 2545 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2546 #if defined(PETSC_USE_COMPLEX) 2547 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2548 #else 2549 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2550 #endif 2551 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2552 B_neigs += B_neigs2; 2553 } 2554 if (B_ierr) { 2555 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2556 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2557 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2558 } 2559 if (pcbddc->dbg_flag) { 2560 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2561 for (j=0;j<B_neigs;j++) { 2562 if (eigs[j] == 0.0) { 2563 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2564 } else { 2565 if (pcbddc->use_deluxe_scaling) { 2566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2567 } else { 2568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2569 } 2570 } 2571 } 2572 } 2573 } else { 2574 /* TODO */ 2575 } 2576 } 2577 /* change the basis back to the original one */ 2578 if (sub_schurs->change) { 2579 Mat change,phi,phit; 2580 2581 if (pcbddc->dbg_flag > 1) { 2582 PetscInt ii; 2583 for (ii=0;ii<B_neigs;ii++) { 2584 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2585 for (j=0;j<B_N;j++) { 2586 #if defined(PETSC_USE_COMPLEX) 2587 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2588 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2589 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2590 #else 2591 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2592 #endif 2593 } 2594 } 2595 } 2596 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2597 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2598 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2599 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2600 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2601 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2602 } 2603 maxneigs = PetscMax(B_neigs,maxneigs); 2604 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2605 if (B_neigs) { 2606 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2607 2608 if (pcbddc->dbg_flag > 1) { 2609 PetscInt ii; 2610 for (ii=0;ii<B_neigs;ii++) { 2611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2612 for (j=0;j<B_N;j++) { 2613 #if defined(PETSC_USE_COMPLEX) 2614 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2615 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2617 #else 2618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2619 #endif 2620 } 2621 } 2622 } 2623 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2624 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2625 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2626 cum++; 2627 } 2628 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2629 /* shift for next computation */ 2630 cumarray += subset_size*subset_size; 2631 } 2632 if (pcbddc->dbg_flag) { 2633 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2634 } 2635 2636 if (mss) { 2637 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2638 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2639 /* destroy matrices (junk) */ 2640 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2641 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2642 } 2643 if (allocated_S_St) { 2644 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2645 } 2646 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2647 #if defined(PETSC_USE_COMPLEX) 2648 ierr = PetscFree(rwork);CHKERRQ(ierr); 2649 #endif 2650 if (pcbddc->dbg_flag) { 2651 PetscInt maxneigs_r; 2652 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2653 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2654 } 2655 PetscFunctionReturn(0); 2656 } 2657 2658 #undef __FUNCT__ 2659 #define __FUNCT__ "PCBDDCSetUpSolvers" 2660 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2661 { 2662 PetscScalar *coarse_submat_vals; 2663 PetscErrorCode ierr; 2664 2665 PetscFunctionBegin; 2666 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2667 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2668 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2669 2670 /* Setup local neumann solver ksp_R */ 2671 /* PCBDDCSetUpLocalScatters should be called first! */ 2672 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2673 2674 /* 2675 Setup local correction and local part of coarse basis. 2676 Gives back the dense local part of the coarse matrix in column major ordering 2677 */ 2678 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2679 2680 /* Compute total number of coarse nodes and setup coarse solver */ 2681 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2682 2683 /* free */ 2684 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2685 PetscFunctionReturn(0); 2686 } 2687 2688 #undef __FUNCT__ 2689 #define __FUNCT__ "PCBDDCResetCustomization" 2690 PetscErrorCode PCBDDCResetCustomization(PC pc) 2691 { 2692 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2693 PetscErrorCode ierr; 2694 2695 PetscFunctionBegin; 2696 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2697 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2698 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2699 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2700 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2701 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2702 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2703 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2704 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2705 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2706 PetscFunctionReturn(0); 2707 } 2708 2709 #undef __FUNCT__ 2710 #define __FUNCT__ "PCBDDCResetTopography" 2711 PetscErrorCode PCBDDCResetTopography(PC pc) 2712 { 2713 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2714 PetscInt i; 2715 PetscErrorCode ierr; 2716 2717 PetscFunctionBegin; 2718 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2719 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2720 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2721 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2722 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2723 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2724 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2725 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2726 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2727 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2728 pcbddc->graphanalyzed = PETSC_FALSE; 2729 for (i=0;i<pcbddc->n_local_subs;i++) { 2730 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2731 } 2732 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2733 if (pcbddc->sub_schurs) { 2734 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2735 } 2736 PetscFunctionReturn(0); 2737 } 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "PCBDDCResetSolvers" 2741 PetscErrorCode PCBDDCResetSolvers(PC pc) 2742 { 2743 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2748 if (pcbddc->coarse_phi_B) { 2749 PetscScalar *array; 2750 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2751 ierr = PetscFree(array);CHKERRQ(ierr); 2752 } 2753 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2754 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2755 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2756 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2757 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2758 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2759 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2760 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2761 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2762 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2763 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2764 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2765 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2766 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2767 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2768 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2769 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2770 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2771 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2772 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2773 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2774 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2775 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2776 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2777 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2778 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2779 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2780 if (pcbddc->benign_zerodiag_subs) { 2781 PetscInt i; 2782 for (i=0;i<pcbddc->benign_n;i++) { 2783 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2784 } 2785 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2786 } 2787 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2788 PetscFunctionReturn(0); 2789 } 2790 2791 #undef __FUNCT__ 2792 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2793 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2794 { 2795 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2796 PC_IS *pcis = (PC_IS*)pc->data; 2797 VecType impVecType; 2798 PetscInt n_constraints,n_R,old_size; 2799 PetscErrorCode ierr; 2800 2801 PetscFunctionBegin; 2802 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2803 /* get sizes */ 2804 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2805 n_R = pcis->n - pcbddc->n_vertices; 2806 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2807 /* local work vectors (try to avoid unneeded work)*/ 2808 /* R nodes */ 2809 old_size = -1; 2810 if (pcbddc->vec1_R) { 2811 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2812 } 2813 if (n_R != old_size) { 2814 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2815 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2816 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2817 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2818 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2819 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2820 } 2821 /* local primal dofs */ 2822 old_size = -1; 2823 if (pcbddc->vec1_P) { 2824 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2825 } 2826 if (pcbddc->local_primal_size != old_size) { 2827 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2828 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2829 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2830 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2831 } 2832 /* local explicit constraints */ 2833 old_size = -1; 2834 if (pcbddc->vec1_C) { 2835 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2836 } 2837 if (n_constraints && n_constraints != old_size) { 2838 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2839 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2840 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2841 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2842 } 2843 PetscFunctionReturn(0); 2844 } 2845 2846 #undef __FUNCT__ 2847 #define __FUNCT__ "PCBDDCSetUpCorrection" 2848 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2849 { 2850 PetscErrorCode ierr; 2851 /* pointers to pcis and pcbddc */ 2852 PC_IS* pcis = (PC_IS*)pc->data; 2853 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2854 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2855 /* submatrices of local problem */ 2856 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2857 /* submatrices of local coarse problem */ 2858 Mat S_VV,S_CV,S_VC,S_CC; 2859 /* working matrices */ 2860 Mat C_CR; 2861 /* additional working stuff */ 2862 PC pc_R; 2863 Mat F; 2864 Vec dummy_vec; 2865 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2866 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2867 PetscScalar *work; 2868 PetscInt *idx_V_B; 2869 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2870 PetscInt i,n_R,n_D,n_B; 2871 2872 /* some shortcuts to scalars */ 2873 PetscScalar one=1.0,m_one=-1.0; 2874 2875 PetscFunctionBegin; 2876 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 2877 2878 /* Set Non-overlapping dimensions */ 2879 n_vertices = pcbddc->n_vertices; 2880 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 2881 n_B = pcis->n_B; 2882 n_D = pcis->n - n_B; 2883 n_R = pcis->n - n_vertices; 2884 2885 /* vertices in boundary numbering */ 2886 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 2887 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 2888 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 2889 2890 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2891 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 2892 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 2893 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 2894 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 2895 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 2896 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 2897 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 2898 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 2899 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 2900 2901 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 2902 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 2903 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 2904 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 2905 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 2906 lda_rhs = n_R; 2907 need_benign_correction = PETSC_FALSE; 2908 if (isLU || isILU || isCHOL) { 2909 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 2910 } else if (sub_schurs && sub_schurs->reuse_solver) { 2911 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2912 MatFactorType type; 2913 2914 F = reuse_solver->F; 2915 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 2916 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 2917 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 2918 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 2919 } else { 2920 F = NULL; 2921 } 2922 2923 /* allocate workspace */ 2924 n = 0; 2925 if (n_constraints) { 2926 n += lda_rhs*n_constraints; 2927 } 2928 if (n_vertices) { 2929 n = PetscMax(2*lda_rhs*n_vertices,n); 2930 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 2931 } 2932 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 2933 2934 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 2935 dummy_vec = NULL; 2936 if (need_benign_correction && lda_rhs != n_R && F) { 2937 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 2938 } 2939 2940 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2941 if (n_constraints) { 2942 Mat M1,M2,M3,C_B; 2943 IS is_aux; 2944 PetscScalar *array,*array2; 2945 2946 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2947 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2948 2949 /* Extract constraints on R nodes: C_{CR} */ 2950 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 2951 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2952 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2953 2954 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 2955 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 2956 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2957 for (i=0;i<n_constraints;i++) { 2958 const PetscScalar *row_cmat_values; 2959 const PetscInt *row_cmat_indices; 2960 PetscInt size_of_constraint,j; 2961 2962 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 2963 for (j=0;j<size_of_constraint;j++) { 2964 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 2965 } 2966 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 2967 } 2968 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 2969 if (F) { 2970 Mat B; 2971 2972 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2973 if (need_benign_correction) { 2974 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2975 2976 /* rhs is already zero on interior dofs, no need to change the rhs */ 2977 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 2978 } 2979 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 2980 if (need_benign_correction) { 2981 PetscScalar *marr; 2982 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2983 2984 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2985 if (lda_rhs != n_R) { 2986 for (i=0;i<n_constraints;i++) { 2987 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2988 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2989 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2990 } 2991 } else { 2992 for (i=0;i<n_constraints;i++) { 2993 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2994 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2995 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2996 } 2997 } 2998 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2999 } 3000 ierr = MatDestroy(&B);CHKERRQ(ierr); 3001 } else { 3002 PetscScalar *marr; 3003 3004 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3005 for (i=0;i<n_constraints;i++) { 3006 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3007 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3008 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3009 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3010 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3011 } 3012 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3013 } 3014 if (!pcbddc->switch_static) { 3015 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3016 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3017 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3018 for (i=0;i<n_constraints;i++) { 3019 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3020 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3021 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3022 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3023 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3024 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3025 } 3026 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3027 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3028 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3029 } else { 3030 if (lda_rhs != n_R) { 3031 IS dummy; 3032 3033 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3034 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3035 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3036 } else { 3037 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3038 pcbddc->local_auxmat2 = local_auxmat2_R; 3039 } 3040 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3041 } 3042 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3043 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3044 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3045 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3046 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3047 if (isCHOL) { 3048 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3049 } else { 3050 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3051 } 3052 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3053 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3054 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3055 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3056 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3057 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3058 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3059 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3060 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3061 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3062 } 3063 3064 /* Get submatrices from subdomain matrix */ 3065 if (n_vertices) { 3066 IS is_aux; 3067 3068 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3069 IS tis; 3070 3071 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3072 ierr = ISSort(tis);CHKERRQ(ierr); 3073 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3074 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3075 } else { 3076 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3077 } 3078 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3079 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3080 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3081 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3082 } 3083 3084 /* Matrix of coarse basis functions (local) */ 3085 if (pcbddc->coarse_phi_B) { 3086 PetscInt on_B,on_primal,on_D=n_D; 3087 if (pcbddc->coarse_phi_D) { 3088 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3089 } 3090 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3091 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3092 PetscScalar *marray; 3093 3094 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3095 ierr = PetscFree(marray);CHKERRQ(ierr); 3096 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3097 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3098 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3099 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3100 } 3101 } 3102 3103 if (!pcbddc->coarse_phi_B) { 3104 PetscScalar *marray; 3105 3106 n = n_B*pcbddc->local_primal_size; 3107 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3108 n += n_D*pcbddc->local_primal_size; 3109 } 3110 if (!pcbddc->symmetric_primal) { 3111 n *= 2; 3112 } 3113 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3114 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3115 n = n_B*pcbddc->local_primal_size; 3116 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3117 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3118 n += n_D*pcbddc->local_primal_size; 3119 } 3120 if (!pcbddc->symmetric_primal) { 3121 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3122 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3123 n = n_B*pcbddc->local_primal_size; 3124 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3125 } 3126 } else { 3127 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3128 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3129 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3130 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3131 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3132 } 3133 } 3134 } 3135 3136 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3137 p0_lidx_I = NULL; 3138 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3139 const PetscInt *idxs; 3140 3141 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3142 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3143 for (i=0;i<pcbddc->benign_n;i++) { 3144 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3145 } 3146 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3147 } 3148 3149 /* vertices */ 3150 if (n_vertices) { 3151 3152 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3153 3154 if (n_R) { 3155 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3156 PetscBLASInt B_N,B_one = 1; 3157 PetscScalar *x,*y; 3158 PetscBool isseqaij; 3159 3160 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3161 if (need_benign_correction) { 3162 ISLocalToGlobalMapping RtoN; 3163 IS is_p0; 3164 PetscInt *idxs_p0,n; 3165 3166 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3167 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3168 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3169 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3170 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3171 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3172 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3173 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3174 } 3175 3176 if (lda_rhs == n_R) { 3177 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3178 } else { 3179 PetscScalar *av,*array; 3180 const PetscInt *xadj,*adjncy; 3181 PetscInt n; 3182 PetscBool flg_row; 3183 3184 array = work+lda_rhs*n_vertices; 3185 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3186 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3187 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3188 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3189 for (i=0;i<n;i++) { 3190 PetscInt j; 3191 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3192 } 3193 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3194 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3195 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3196 } 3197 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3198 if (need_benign_correction) { 3199 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3200 PetscScalar *marr; 3201 3202 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3203 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3204 3205 | 0 0 0 | (V) 3206 L = | 0 0 -1 | (P-p0) 3207 | 0 0 -1 | (p0) 3208 3209 */ 3210 for (i=0;i<reuse_solver->benign_n;i++) { 3211 const PetscScalar *vals; 3212 const PetscInt *idxs,*idxs_zero; 3213 PetscInt n,j,nz; 3214 3215 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3216 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3217 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3218 for (j=0;j<n;j++) { 3219 PetscScalar val = vals[j]; 3220 PetscInt k,col = idxs[j]; 3221 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3222 } 3223 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3224 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3225 } 3226 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3227 } 3228 if (F) { 3229 /* need to correct the rhs */ 3230 if (need_benign_correction) { 3231 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3232 PetscScalar *marr; 3233 3234 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3235 if (lda_rhs != n_R) { 3236 for (i=0;i<n_vertices;i++) { 3237 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3238 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3239 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3240 } 3241 } else { 3242 for (i=0;i<n_vertices;i++) { 3243 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3244 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3245 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3246 } 3247 } 3248 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3249 } 3250 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3251 /* need to correct the solution */ 3252 if (need_benign_correction) { 3253 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3254 PetscScalar *marr; 3255 3256 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3257 if (lda_rhs != n_R) { 3258 for (i=0;i<n_vertices;i++) { 3259 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3260 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3261 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3262 } 3263 } else { 3264 for (i=0;i<n_vertices;i++) { 3265 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3266 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3267 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3268 } 3269 } 3270 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3271 } 3272 } else { 3273 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3274 for (i=0;i<n_vertices;i++) { 3275 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3276 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3277 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3278 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3279 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3280 } 3281 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3282 } 3283 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3284 /* S_VV and S_CV */ 3285 if (n_constraints) { 3286 Mat B; 3287 3288 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3289 for (i=0;i<n_vertices;i++) { 3290 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3291 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3292 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3293 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3294 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3295 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3296 } 3297 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3298 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3299 ierr = MatDestroy(&B);CHKERRQ(ierr); 3300 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3301 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3302 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3303 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3304 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3305 ierr = MatDestroy(&B);CHKERRQ(ierr); 3306 } 3307 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3308 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3309 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3310 } 3311 if (lda_rhs != n_R) { 3312 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3313 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3314 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3315 } 3316 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3317 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3318 if (need_benign_correction) { 3319 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3320 PetscScalar *marr,*sums; 3321 3322 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3323 ierr = MatDenseGetArray(S_VVt,&marr); 3324 for (i=0;i<reuse_solver->benign_n;i++) { 3325 const PetscScalar *vals; 3326 const PetscInt *idxs,*idxs_zero; 3327 PetscInt n,j,nz; 3328 3329 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3330 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3331 for (j=0;j<n_vertices;j++) { 3332 PetscInt k; 3333 sums[j] = 0.; 3334 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3335 } 3336 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3337 for (j=0;j<n;j++) { 3338 PetscScalar val = vals[j]; 3339 PetscInt k; 3340 for (k=0;k<n_vertices;k++) { 3341 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3342 } 3343 } 3344 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3345 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3346 } 3347 ierr = PetscFree(sums);CHKERRQ(ierr); 3348 ierr = MatDenseRestoreArray(S_VVt,&marr); 3349 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3350 } 3351 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3352 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3353 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3354 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3355 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3356 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3357 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3358 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3359 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3360 } else { 3361 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3362 } 3363 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3364 3365 /* coarse basis functions */ 3366 for (i=0;i<n_vertices;i++) { 3367 PetscScalar *y; 3368 3369 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3370 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3371 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3372 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3373 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3374 y[n_B*i+idx_V_B[i]] = 1.0; 3375 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3376 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3377 3378 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3379 PetscInt j; 3380 3381 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3382 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3383 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3384 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3385 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3386 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3387 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3388 } 3389 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3390 } 3391 /* if n_R == 0 the object is not destroyed */ 3392 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3393 } 3394 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3395 3396 if (n_constraints) { 3397 Mat B; 3398 3399 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3400 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3401 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3402 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3403 if (n_vertices) { 3404 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3405 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3406 } else { 3407 Mat S_VCt; 3408 3409 if (lda_rhs != n_R) { 3410 ierr = MatDestroy(&B);CHKERRQ(ierr); 3411 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3412 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3413 } 3414 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3415 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3416 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3417 } 3418 } 3419 ierr = MatDestroy(&B);CHKERRQ(ierr); 3420 /* coarse basis functions */ 3421 for (i=0;i<n_constraints;i++) { 3422 PetscScalar *y; 3423 3424 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3425 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3426 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3427 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3428 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3429 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3430 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3431 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3432 PetscInt j; 3433 3434 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3435 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3436 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3437 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3438 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3439 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3440 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3441 } 3442 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3443 } 3444 } 3445 if (n_constraints) { 3446 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3447 } 3448 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3449 3450 /* coarse matrix entries relative to B_0 */ 3451 if (pcbddc->benign_n) { 3452 Mat B0_B,B0_BPHI; 3453 IS is_dummy; 3454 PetscScalar *data; 3455 PetscInt j; 3456 3457 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3458 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3459 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3460 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3461 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3462 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3463 for (j=0;j<pcbddc->benign_n;j++) { 3464 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3465 for (i=0;i<pcbddc->local_primal_size;i++) { 3466 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3467 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3468 } 3469 } 3470 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3471 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3472 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3473 } 3474 3475 /* compute other basis functions for non-symmetric problems */ 3476 if (!pcbddc->symmetric_primal) { 3477 Mat B_V=NULL,B_C=NULL; 3478 PetscScalar *marray; 3479 3480 if (n_constraints) { 3481 Mat S_CCT,C_CRT; 3482 3483 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3484 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3485 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3486 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3487 if (n_vertices) { 3488 Mat S_VCT; 3489 3490 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3491 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3492 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3493 } 3494 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3495 } else { 3496 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3497 } 3498 if (n_vertices && n_R) { 3499 PetscScalar *av,*marray; 3500 const PetscInt *xadj,*adjncy; 3501 PetscInt n; 3502 PetscBool flg_row; 3503 3504 /* B_V = B_V - A_VR^T */ 3505 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3506 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3507 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3508 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3509 for (i=0;i<n;i++) { 3510 PetscInt j; 3511 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3512 } 3513 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3514 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3515 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3516 } 3517 3518 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3519 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3520 for (i=0;i<n_vertices;i++) { 3521 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3522 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3523 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3524 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3525 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3526 } 3527 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3528 if (B_C) { 3529 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3530 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3531 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3532 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3533 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3534 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3535 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3536 } 3537 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3538 } 3539 /* coarse basis functions */ 3540 for (i=0;i<pcbddc->local_primal_size;i++) { 3541 PetscScalar *y; 3542 3543 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3544 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3545 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3546 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3547 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3548 if (i<n_vertices) { 3549 y[n_B*i+idx_V_B[i]] = 1.0; 3550 } 3551 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3552 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3553 3554 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3555 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3556 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3557 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3558 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3559 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3560 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3561 } 3562 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3563 } 3564 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3565 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3566 } 3567 /* free memory */ 3568 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3569 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3570 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3571 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3572 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3573 ierr = PetscFree(work);CHKERRQ(ierr); 3574 if (n_vertices) { 3575 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3576 } 3577 if (n_constraints) { 3578 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3579 } 3580 /* Checking coarse_sub_mat and coarse basis functios */ 3581 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3582 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3583 if (pcbddc->dbg_flag) { 3584 Mat coarse_sub_mat; 3585 Mat AUXMAT,TM1,TM2,TM3,TM4; 3586 Mat coarse_phi_D,coarse_phi_B; 3587 Mat coarse_psi_D,coarse_psi_B; 3588 Mat A_II,A_BB,A_IB,A_BI; 3589 Mat C_B,CPHI; 3590 IS is_dummy; 3591 Vec mones; 3592 MatType checkmattype=MATSEQAIJ; 3593 PetscReal real_value; 3594 3595 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3596 Mat A; 3597 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3598 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3599 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3600 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3601 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3602 ierr = MatDestroy(&A);CHKERRQ(ierr); 3603 } else { 3604 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3605 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3606 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3607 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3608 } 3609 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3610 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3611 if (!pcbddc->symmetric_primal) { 3612 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3613 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3614 } 3615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3616 3617 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3618 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3619 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3620 if (!pcbddc->symmetric_primal) { 3621 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3622 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3623 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3624 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3625 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3626 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3627 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3628 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3629 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3630 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3631 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3632 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3633 } else { 3634 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3635 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3636 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3637 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3638 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3639 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3640 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3641 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3642 } 3643 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3644 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3645 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3646 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3647 if (pcbddc->benign_n) { 3648 Mat B0_B,B0_BPHI; 3649 PetscScalar *data,*data2; 3650 PetscInt j; 3651 3652 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3653 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3654 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3655 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3656 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3657 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3658 for (j=0;j<pcbddc->benign_n;j++) { 3659 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3660 for (i=0;i<pcbddc->local_primal_size;i++) { 3661 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3662 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3663 } 3664 } 3665 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3666 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3667 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3668 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3669 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3670 } 3671 #if 0 3672 { 3673 PetscViewer viewer; 3674 char filename[256]; 3675 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3676 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3677 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3678 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3679 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3680 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3681 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3682 if (save_change) { 3683 Mat phi_B; 3684 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3685 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3686 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3687 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3688 } else { 3689 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3690 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3691 } 3692 if (pcbddc->coarse_phi_D) { 3693 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3694 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3695 } 3696 if (pcbddc->coarse_psi_B) { 3697 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3698 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3699 } 3700 if (pcbddc->coarse_psi_D) { 3701 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3702 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3703 } 3704 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3705 } 3706 #endif 3707 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3708 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3709 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3710 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3711 3712 /* check constraints */ 3713 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3714 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3715 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3716 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3717 } else { 3718 PetscScalar *data; 3719 Mat tmat; 3720 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3721 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3722 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3723 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3724 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3725 } 3726 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3727 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3728 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3729 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3730 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3731 if (!pcbddc->symmetric_primal) { 3732 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3733 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3734 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3735 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3736 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3737 } 3738 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3739 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3740 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3741 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3742 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3743 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3744 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3745 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3746 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3747 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3748 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3749 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3750 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3751 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3752 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3753 if (!pcbddc->symmetric_primal) { 3754 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3755 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3756 } 3757 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3758 } 3759 /* get back data */ 3760 *coarse_submat_vals_n = coarse_submat_vals; 3761 PetscFunctionReturn(0); 3762 } 3763 3764 #undef __FUNCT__ 3765 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3766 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3767 { 3768 Mat *work_mat; 3769 IS isrow_s,iscol_s; 3770 PetscBool rsorted,csorted; 3771 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3772 PetscErrorCode ierr; 3773 3774 PetscFunctionBegin; 3775 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3776 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3777 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3778 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3779 3780 if (!rsorted) { 3781 const PetscInt *idxs; 3782 PetscInt *idxs_sorted,i; 3783 3784 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3785 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3786 for (i=0;i<rsize;i++) { 3787 idxs_perm_r[i] = i; 3788 } 3789 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3790 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3791 for (i=0;i<rsize;i++) { 3792 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3793 } 3794 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3795 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3796 } else { 3797 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3798 isrow_s = isrow; 3799 } 3800 3801 if (!csorted) { 3802 if (isrow == iscol) { 3803 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3804 iscol_s = isrow_s; 3805 } else { 3806 const PetscInt *idxs; 3807 PetscInt *idxs_sorted,i; 3808 3809 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3810 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3811 for (i=0;i<csize;i++) { 3812 idxs_perm_c[i] = i; 3813 } 3814 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3815 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3816 for (i=0;i<csize;i++) { 3817 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3818 } 3819 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3820 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3821 } 3822 } else { 3823 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3824 iscol_s = iscol; 3825 } 3826 3827 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3828 3829 if (!rsorted || !csorted) { 3830 Mat new_mat; 3831 IS is_perm_r,is_perm_c; 3832 3833 if (!rsorted) { 3834 PetscInt *idxs_r,i; 3835 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3836 for (i=0;i<rsize;i++) { 3837 idxs_r[idxs_perm_r[i]] = i; 3838 } 3839 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3840 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3841 } else { 3842 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3843 } 3844 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3845 3846 if (!csorted) { 3847 if (isrow_s == iscol_s) { 3848 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3849 is_perm_c = is_perm_r; 3850 } else { 3851 PetscInt *idxs_c,i; 3852 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3853 for (i=0;i<csize;i++) { 3854 idxs_c[idxs_perm_c[i]] = i; 3855 } 3856 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3857 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3858 } 3859 } else { 3860 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3861 } 3862 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3863 3864 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3865 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3866 work_mat[0] = new_mat; 3867 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3868 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3869 } 3870 3871 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 3872 *B = work_mat[0]; 3873 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 3874 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 3875 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 3876 PetscFunctionReturn(0); 3877 } 3878 3879 #undef __FUNCT__ 3880 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 3881 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 3882 { 3883 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3884 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3885 Mat new_mat; 3886 IS is_local,is_global; 3887 PetscInt local_size; 3888 PetscBool isseqaij; 3889 PetscErrorCode ierr; 3890 3891 PetscFunctionBegin; 3892 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3893 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 3894 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 3895 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 3896 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 3897 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 3898 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 3899 3900 /* check */ 3901 if (pcbddc->dbg_flag) { 3902 Vec x,x_change; 3903 PetscReal error; 3904 3905 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 3906 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3907 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 3908 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3909 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3910 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 3911 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3912 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3913 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3914 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3915 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3916 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 3917 ierr = VecDestroy(&x);CHKERRQ(ierr); 3918 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3919 } 3920 3921 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 3922 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3923 if (isseqaij) { 3924 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3925 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3926 } else { 3927 Mat work_mat; 3928 3929 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3930 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3931 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3932 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 3933 } 3934 if (matis->A->symmetric_set) { 3935 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 3936 #if !defined(PETSC_USE_COMPLEX) 3937 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 3938 #endif 3939 } 3940 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 3941 PetscFunctionReturn(0); 3942 } 3943 3944 #undef __FUNCT__ 3945 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 3946 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 3947 { 3948 PC_IS* pcis = (PC_IS*)(pc->data); 3949 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3950 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3951 PetscInt *idx_R_local=NULL; 3952 PetscInt n_vertices,i,j,n_R,n_D,n_B; 3953 PetscInt vbs,bs; 3954 PetscBT bitmask=NULL; 3955 PetscErrorCode ierr; 3956 3957 PetscFunctionBegin; 3958 /* 3959 No need to setup local scatters if 3960 - primal space is unchanged 3961 AND 3962 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 3963 AND 3964 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 3965 */ 3966 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 3967 PetscFunctionReturn(0); 3968 } 3969 /* destroy old objects */ 3970 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3971 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3972 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3973 /* Set Non-overlapping dimensions */ 3974 n_B = pcis->n_B; 3975 n_D = pcis->n - n_B; 3976 n_vertices = pcbddc->n_vertices; 3977 3978 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 3979 3980 /* create auxiliary bitmask and allocate workspace */ 3981 if (!sub_schurs || !sub_schurs->reuse_solver) { 3982 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 3983 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 3984 for (i=0;i<n_vertices;i++) { 3985 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 3986 } 3987 3988 for (i=0, n_R=0; i<pcis->n; i++) { 3989 if (!PetscBTLookup(bitmask,i)) { 3990 idx_R_local[n_R++] = i; 3991 } 3992 } 3993 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 3994 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3995 3996 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3997 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 3998 } 3999 4000 /* Block code */ 4001 vbs = 1; 4002 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4003 if (bs>1 && !(n_vertices%bs)) { 4004 PetscBool is_blocked = PETSC_TRUE; 4005 PetscInt *vary; 4006 if (!sub_schurs || !sub_schurs->reuse_solver) { 4007 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4008 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4009 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4010 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4011 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4012 for (i=0; i<pcis->n/bs; i++) { 4013 if (vary[i]!=0 && vary[i]!=bs) { 4014 is_blocked = PETSC_FALSE; 4015 break; 4016 } 4017 } 4018 ierr = PetscFree(vary);CHKERRQ(ierr); 4019 } else { 4020 /* Verify directly the R set */ 4021 for (i=0; i<n_R/bs; i++) { 4022 PetscInt j,node=idx_R_local[bs*i]; 4023 for (j=1; j<bs; j++) { 4024 if (node != idx_R_local[bs*i+j]-j) { 4025 is_blocked = PETSC_FALSE; 4026 break; 4027 } 4028 } 4029 } 4030 } 4031 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4032 vbs = bs; 4033 for (i=0;i<n_R/vbs;i++) { 4034 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4035 } 4036 } 4037 } 4038 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4039 if (sub_schurs && sub_schurs->reuse_solver) { 4040 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4041 4042 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4043 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4044 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4045 reuse_solver->is_R = pcbddc->is_R_local; 4046 } else { 4047 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4048 } 4049 4050 /* print some info if requested */ 4051 if (pcbddc->dbg_flag) { 4052 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4053 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4054 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4055 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4056 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4057 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4058 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4059 } 4060 4061 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4062 if (!sub_schurs || !sub_schurs->reuse_solver) { 4063 IS is_aux1,is_aux2; 4064 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4065 4066 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4067 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4068 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4069 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4070 for (i=0; i<n_D; i++) { 4071 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4072 } 4073 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4074 for (i=0, j=0; i<n_R; i++) { 4075 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4076 aux_array1[j++] = i; 4077 } 4078 } 4079 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4080 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4081 for (i=0, j=0; i<n_B; i++) { 4082 if (!PetscBTLookup(bitmask,is_indices[i])) { 4083 aux_array2[j++] = i; 4084 } 4085 } 4086 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4087 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4088 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4089 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4090 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4091 4092 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4093 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4094 for (i=0, j=0; i<n_R; i++) { 4095 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4096 aux_array1[j++] = i; 4097 } 4098 } 4099 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4100 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4101 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4102 } 4103 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4104 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4105 } else { 4106 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4107 IS tis; 4108 PetscInt schur_size; 4109 4110 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4111 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4112 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4113 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4114 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4115 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4116 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4117 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4118 } 4119 } 4120 PetscFunctionReturn(0); 4121 } 4122 4123 4124 #undef __FUNCT__ 4125 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4126 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4127 { 4128 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4129 PC_IS *pcis = (PC_IS*)pc->data; 4130 PC pc_temp; 4131 Mat A_RR; 4132 MatReuse reuse; 4133 PetscScalar m_one = -1.0; 4134 PetscReal value; 4135 PetscInt n_D,n_R; 4136 PetscBool check_corr[2],issbaij; 4137 PetscErrorCode ierr; 4138 /* prefixes stuff */ 4139 char dir_prefix[256],neu_prefix[256],str_level[16]; 4140 size_t len; 4141 4142 PetscFunctionBegin; 4143 4144 /* compute prefixes */ 4145 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4146 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4147 if (!pcbddc->current_level) { 4148 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4149 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4150 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4151 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4152 } else { 4153 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4154 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4155 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4156 len -= 15; /* remove "pc_bddc_coarse_" */ 4157 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4158 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4159 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4160 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4161 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4162 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4163 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4164 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4165 } 4166 4167 /* DIRICHLET PROBLEM */ 4168 if (dirichlet) { 4169 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4170 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4171 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4172 if (pcbddc->dbg_flag) { 4173 Mat A_IIn; 4174 4175 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4176 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4177 pcis->A_II = A_IIn; 4178 } 4179 } 4180 if (pcbddc->local_mat->symmetric_set) { 4181 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4182 } 4183 /* Matrix for Dirichlet problem is pcis->A_II */ 4184 n_D = pcis->n - pcis->n_B; 4185 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4186 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4187 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4188 /* default */ 4189 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4190 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4191 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4192 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4193 if (issbaij) { 4194 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4195 } else { 4196 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4197 } 4198 /* Allow user's customization */ 4199 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4200 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4201 } 4202 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4203 if (sub_schurs && sub_schurs->reuse_solver) { 4204 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4205 4206 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4207 } 4208 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4209 if (!n_D) { 4210 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4211 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4212 } 4213 /* Set Up KSP for Dirichlet problem of BDDC */ 4214 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4215 /* set ksp_D into pcis data */ 4216 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4217 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4218 pcis->ksp_D = pcbddc->ksp_D; 4219 } 4220 4221 /* NEUMANN PROBLEM */ 4222 A_RR = 0; 4223 if (neumann) { 4224 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4225 PetscInt ibs,mbs; 4226 PetscBool issbaij; 4227 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4228 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4229 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4230 if (pcbddc->ksp_R) { /* already created ksp */ 4231 PetscInt nn_R; 4232 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4233 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4234 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4235 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4236 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4237 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4238 reuse = MAT_INITIAL_MATRIX; 4239 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4240 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4241 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4242 reuse = MAT_INITIAL_MATRIX; 4243 } else { /* safe to reuse the matrix */ 4244 reuse = MAT_REUSE_MATRIX; 4245 } 4246 } 4247 /* last check */ 4248 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4249 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4250 reuse = MAT_INITIAL_MATRIX; 4251 } 4252 } else { /* first time, so we need to create the matrix */ 4253 reuse = MAT_INITIAL_MATRIX; 4254 } 4255 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4256 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4257 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4258 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4259 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4260 if (matis->A == pcbddc->local_mat) { 4261 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4262 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4263 } else { 4264 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4265 } 4266 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4267 if (matis->A == pcbddc->local_mat) { 4268 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4269 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4270 } else { 4271 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4272 } 4273 } 4274 /* extract A_RR */ 4275 if (sub_schurs && sub_schurs->reuse_solver) { 4276 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4277 4278 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4279 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4280 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4281 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4282 } else { 4283 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4284 } 4285 } else { 4286 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4287 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4288 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4289 } 4290 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4291 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4292 } 4293 if (pcbddc->local_mat->symmetric_set) { 4294 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4295 } 4296 if (!pcbddc->ksp_R) { /* create object if not present */ 4297 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4298 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4299 /* default */ 4300 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4301 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4302 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4303 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4304 if (issbaij) { 4305 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4306 } else { 4307 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4308 } 4309 /* Allow user's customization */ 4310 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4311 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4312 } 4313 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4314 if (!n_R) { 4315 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4316 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4317 } 4318 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4319 /* Reuse solver if it is present */ 4320 if (sub_schurs && sub_schurs->reuse_solver) { 4321 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4322 4323 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4324 } 4325 /* Set Up KSP for Neumann problem of BDDC */ 4326 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4327 } 4328 4329 if (pcbddc->dbg_flag) { 4330 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4331 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4332 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4333 } 4334 4335 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4336 check_corr[0] = check_corr[1] = PETSC_FALSE; 4337 if (pcbddc->NullSpace_corr[0]) { 4338 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4339 } 4340 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4341 check_corr[0] = PETSC_TRUE; 4342 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4343 } 4344 if (neumann && pcbddc->NullSpace_corr[2]) { 4345 check_corr[1] = PETSC_TRUE; 4346 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4347 } 4348 4349 /* check Dirichlet and Neumann solvers */ 4350 if (pcbddc->dbg_flag) { 4351 if (dirichlet) { /* Dirichlet */ 4352 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4353 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4354 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4355 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4356 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4357 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4358 if (check_corr[0]) { 4359 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4360 } 4361 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4362 } 4363 if (neumann) { /* Neumann */ 4364 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4365 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4366 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4367 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4368 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4369 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4370 if (check_corr[1]) { 4371 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4372 } 4373 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4374 } 4375 } 4376 /* free Neumann problem's matrix */ 4377 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4378 PetscFunctionReturn(0); 4379 } 4380 4381 #undef __FUNCT__ 4382 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4383 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4384 { 4385 PetscErrorCode ierr; 4386 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4387 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4388 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4389 4390 PetscFunctionBegin; 4391 if (!reuse_solver) { 4392 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4393 } 4394 if (!pcbddc->switch_static) { 4395 if (applytranspose && pcbddc->local_auxmat1) { 4396 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4397 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4398 } 4399 if (!reuse_solver) { 4400 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4401 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4402 } else { 4403 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4404 4405 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4406 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4407 } 4408 } else { 4409 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4410 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4411 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4412 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4413 if (applytranspose && pcbddc->local_auxmat1) { 4414 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4415 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4416 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4417 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4418 } 4419 } 4420 if (!reuse_solver || pcbddc->switch_static) { 4421 if (applytranspose) { 4422 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4423 } else { 4424 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4425 } 4426 } else { 4427 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4428 4429 if (applytranspose) { 4430 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4431 } else { 4432 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4433 } 4434 } 4435 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4436 if (!pcbddc->switch_static) { 4437 if (!reuse_solver) { 4438 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4439 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4440 } else { 4441 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4442 4443 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4444 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4445 } 4446 if (!applytranspose && pcbddc->local_auxmat1) { 4447 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4448 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4449 } 4450 } else { 4451 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4452 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4453 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4454 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4455 if (!applytranspose && pcbddc->local_auxmat1) { 4456 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4457 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4458 } 4459 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4460 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4461 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4462 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4463 } 4464 PetscFunctionReturn(0); 4465 } 4466 4467 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4468 #undef __FUNCT__ 4469 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4470 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4471 { 4472 PetscErrorCode ierr; 4473 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4474 PC_IS* pcis = (PC_IS*) (pc->data); 4475 const PetscScalar zero = 0.0; 4476 4477 PetscFunctionBegin; 4478 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4479 if (!pcbddc->benign_apply_coarse_only) { 4480 if (applytranspose) { 4481 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4482 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4483 } else { 4484 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4485 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4486 } 4487 } else { 4488 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4489 } 4490 4491 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4492 if (pcbddc->benign_n) { 4493 PetscScalar *array; 4494 PetscInt j; 4495 4496 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4497 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4498 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4499 } 4500 4501 /* start communications from local primal nodes to rhs of coarse solver */ 4502 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4503 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4504 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4505 4506 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4507 if (pcbddc->coarse_ksp) { 4508 Mat coarse_mat; 4509 Vec rhs,sol; 4510 MatNullSpace nullsp; 4511 PetscBool isbddc = PETSC_FALSE; 4512 4513 if (pcbddc->benign_have_null) { 4514 PC coarse_pc; 4515 4516 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4517 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4518 /* we need to propagate to coarser levels the need for a possible benign correction */ 4519 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4520 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4521 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4522 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4523 } 4524 } 4525 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4526 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4527 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4528 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4529 if (nullsp) { 4530 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4531 } 4532 if (applytranspose) { 4533 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4534 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4535 } else { 4536 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4537 PC coarse_pc; 4538 4539 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4540 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4541 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4542 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4543 } else { 4544 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4545 } 4546 } 4547 /* we don't need the benign correction at coarser levels anymore */ 4548 if (pcbddc->benign_have_null && isbddc) { 4549 PC coarse_pc; 4550 PC_BDDC* coarsepcbddc; 4551 4552 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4553 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4554 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4555 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4556 } 4557 if (nullsp) { 4558 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4559 } 4560 } 4561 4562 /* Local solution on R nodes */ 4563 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4564 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4565 } 4566 /* communications from coarse sol to local primal nodes */ 4567 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4568 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4569 4570 /* Sum contributions from the two levels */ 4571 if (!pcbddc->benign_apply_coarse_only) { 4572 if (applytranspose) { 4573 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4574 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4575 } else { 4576 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4577 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4578 } 4579 /* store p0 */ 4580 if (pcbddc->benign_n) { 4581 PetscScalar *array; 4582 PetscInt j; 4583 4584 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4585 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4586 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4587 } 4588 } else { /* expand the coarse solution */ 4589 if (applytranspose) { 4590 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4591 } else { 4592 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4593 } 4594 } 4595 PetscFunctionReturn(0); 4596 } 4597 4598 #undef __FUNCT__ 4599 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4600 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4601 { 4602 PetscErrorCode ierr; 4603 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4604 PetscScalar *array; 4605 Vec from,to; 4606 4607 PetscFunctionBegin; 4608 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4609 from = pcbddc->coarse_vec; 4610 to = pcbddc->vec1_P; 4611 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4612 Vec tvec; 4613 4614 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4615 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4616 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4617 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4618 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4619 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4620 } 4621 } else { /* from local to global -> put data in coarse right hand side */ 4622 from = pcbddc->vec1_P; 4623 to = pcbddc->coarse_vec; 4624 } 4625 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4626 PetscFunctionReturn(0); 4627 } 4628 4629 #undef __FUNCT__ 4630 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4631 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4632 { 4633 PetscErrorCode ierr; 4634 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4635 PetscScalar *array; 4636 Vec from,to; 4637 4638 PetscFunctionBegin; 4639 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4640 from = pcbddc->coarse_vec; 4641 to = pcbddc->vec1_P; 4642 } else { /* from local to global -> put data in coarse right hand side */ 4643 from = pcbddc->vec1_P; 4644 to = pcbddc->coarse_vec; 4645 } 4646 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4647 if (smode == SCATTER_FORWARD) { 4648 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4649 Vec tvec; 4650 4651 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4652 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4653 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4654 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4655 } 4656 } else { 4657 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4658 ierr = VecResetArray(from);CHKERRQ(ierr); 4659 } 4660 } 4661 PetscFunctionReturn(0); 4662 } 4663 4664 /* uncomment for testing purposes */ 4665 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4666 #undef __FUNCT__ 4667 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4668 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4669 { 4670 PetscErrorCode ierr; 4671 PC_IS* pcis = (PC_IS*)(pc->data); 4672 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4673 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4674 /* one and zero */ 4675 PetscScalar one=1.0,zero=0.0; 4676 /* space to store constraints and their local indices */ 4677 PetscScalar *constraints_data; 4678 PetscInt *constraints_idxs,*constraints_idxs_B; 4679 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4680 PetscInt *constraints_n; 4681 /* iterators */ 4682 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4683 /* BLAS integers */ 4684 PetscBLASInt lwork,lierr; 4685 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4686 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4687 /* reuse */ 4688 PetscInt olocal_primal_size,olocal_primal_size_cc; 4689 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4690 /* change of basis */ 4691 PetscBool qr_needed; 4692 PetscBT change_basis,qr_needed_idx; 4693 /* auxiliary stuff */ 4694 PetscInt *nnz,*is_indices; 4695 PetscInt ncc; 4696 /* some quantities */ 4697 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4698 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4699 4700 PetscFunctionBegin; 4701 /* Destroy Mat objects computed previously */ 4702 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4703 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4704 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4705 /* save info on constraints from previous setup (if any) */ 4706 olocal_primal_size = pcbddc->local_primal_size; 4707 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4708 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4709 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4710 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4711 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4712 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4713 4714 if (!pcbddc->adaptive_selection) { 4715 IS ISForVertices,*ISForFaces,*ISForEdges; 4716 MatNullSpace nearnullsp; 4717 const Vec *nearnullvecs; 4718 Vec *localnearnullsp; 4719 PetscScalar *array; 4720 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4721 PetscBool nnsp_has_cnst; 4722 /* LAPACK working arrays for SVD or POD */ 4723 PetscBool skip_lapack,boolforchange; 4724 PetscScalar *work; 4725 PetscReal *singular_vals; 4726 #if defined(PETSC_USE_COMPLEX) 4727 PetscReal *rwork; 4728 #endif 4729 #if defined(PETSC_MISSING_LAPACK_GESVD) 4730 PetscScalar *temp_basis,*correlation_mat; 4731 #else 4732 PetscBLASInt dummy_int=1; 4733 PetscScalar dummy_scalar=1.; 4734 #endif 4735 4736 /* Get index sets for faces, edges and vertices from graph */ 4737 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4738 /* print some info */ 4739 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 4740 PetscInt nv; 4741 4742 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4743 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4744 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4745 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4746 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4747 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4748 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4749 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4750 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4751 } 4752 4753 /* free unneeded index sets */ 4754 if (!pcbddc->use_vertices) { 4755 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4756 } 4757 if (!pcbddc->use_edges) { 4758 for (i=0;i<n_ISForEdges;i++) { 4759 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4760 } 4761 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4762 n_ISForEdges = 0; 4763 } 4764 if (!pcbddc->use_faces) { 4765 for (i=0;i<n_ISForFaces;i++) { 4766 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4767 } 4768 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4769 n_ISForFaces = 0; 4770 } 4771 4772 /* check if near null space is attached to global mat */ 4773 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4774 if (nearnullsp) { 4775 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4776 /* remove any stored info */ 4777 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4778 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4779 /* store information for BDDC solver reuse */ 4780 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4781 pcbddc->onearnullspace = nearnullsp; 4782 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4783 for (i=0;i<nnsp_size;i++) { 4784 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4785 } 4786 } else { /* if near null space is not provided BDDC uses constants by default */ 4787 nnsp_size = 0; 4788 nnsp_has_cnst = PETSC_TRUE; 4789 } 4790 /* get max number of constraints on a single cc */ 4791 max_constraints = nnsp_size; 4792 if (nnsp_has_cnst) max_constraints++; 4793 4794 /* 4795 Evaluate maximum storage size needed by the procedure 4796 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4797 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4798 There can be multiple constraints per connected component 4799 */ 4800 n_vertices = 0; 4801 if (ISForVertices) { 4802 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4803 } 4804 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4805 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4806 4807 total_counts = n_ISForFaces+n_ISForEdges; 4808 total_counts *= max_constraints; 4809 total_counts += n_vertices; 4810 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4811 4812 total_counts = 0; 4813 max_size_of_constraint = 0; 4814 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4815 IS used_is; 4816 if (i<n_ISForEdges) { 4817 used_is = ISForEdges[i]; 4818 } else { 4819 used_is = ISForFaces[i-n_ISForEdges]; 4820 } 4821 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4822 total_counts += j; 4823 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4824 } 4825 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4826 4827 /* get local part of global near null space vectors */ 4828 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4829 for (k=0;k<nnsp_size;k++) { 4830 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4831 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4832 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4833 } 4834 4835 /* whether or not to skip lapack calls */ 4836 skip_lapack = PETSC_TRUE; 4837 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4838 4839 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4840 if (!skip_lapack) { 4841 PetscScalar temp_work; 4842 4843 #if defined(PETSC_MISSING_LAPACK_GESVD) 4844 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4845 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4846 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4847 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4848 #if defined(PETSC_USE_COMPLEX) 4849 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4850 #endif 4851 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4852 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4853 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4854 lwork = -1; 4855 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4856 #if !defined(PETSC_USE_COMPLEX) 4857 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4858 #else 4859 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4860 #endif 4861 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4862 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4863 #else /* on missing GESVD */ 4864 /* SVD */ 4865 PetscInt max_n,min_n; 4866 max_n = max_size_of_constraint; 4867 min_n = max_constraints; 4868 if (max_size_of_constraint < max_constraints) { 4869 min_n = max_size_of_constraint; 4870 max_n = max_constraints; 4871 } 4872 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 4873 #if defined(PETSC_USE_COMPLEX) 4874 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 4875 #endif 4876 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4877 lwork = -1; 4878 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 4879 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 4880 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 4881 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4882 #if !defined(PETSC_USE_COMPLEX) 4883 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 4884 #else 4885 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 4886 #endif 4887 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4888 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 4889 #endif /* on missing GESVD */ 4890 /* Allocate optimal workspace */ 4891 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 4892 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 4893 } 4894 /* Now we can loop on constraining sets */ 4895 total_counts = 0; 4896 constraints_idxs_ptr[0] = 0; 4897 constraints_data_ptr[0] = 0; 4898 /* vertices */ 4899 if (n_vertices) { 4900 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4901 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4902 for (i=0;i<n_vertices;i++) { 4903 constraints_n[total_counts] = 1; 4904 constraints_data[total_counts] = 1.0; 4905 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 4906 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 4907 total_counts++; 4908 } 4909 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4910 n_vertices = total_counts; 4911 } 4912 4913 /* edges and faces */ 4914 total_counts_cc = total_counts; 4915 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 4916 IS used_is; 4917 PetscBool idxs_copied = PETSC_FALSE; 4918 4919 if (ncc<n_ISForEdges) { 4920 used_is = ISForEdges[ncc]; 4921 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 4922 } else { 4923 used_is = ISForFaces[ncc-n_ISForEdges]; 4924 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 4925 } 4926 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 4927 4928 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 4929 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4930 /* change of basis should not be performed on local periodic nodes */ 4931 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 4932 if (nnsp_has_cnst) { 4933 PetscScalar quad_value; 4934 4935 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4936 idxs_copied = PETSC_TRUE; 4937 4938 if (!pcbddc->use_nnsp_true) { 4939 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 4940 } else { 4941 quad_value = 1.0; 4942 } 4943 for (j=0;j<size_of_constraint;j++) { 4944 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 4945 } 4946 temp_constraints++; 4947 total_counts++; 4948 } 4949 for (k=0;k<nnsp_size;k++) { 4950 PetscReal real_value; 4951 PetscScalar *ptr_to_data; 4952 4953 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4954 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 4955 for (j=0;j<size_of_constraint;j++) { 4956 ptr_to_data[j] = array[is_indices[j]]; 4957 } 4958 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4959 /* check if array is null on the connected component */ 4960 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4961 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 4962 if (real_value > 0.0) { /* keep indices and values */ 4963 temp_constraints++; 4964 total_counts++; 4965 if (!idxs_copied) { 4966 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4967 idxs_copied = PETSC_TRUE; 4968 } 4969 } 4970 } 4971 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4972 valid_constraints = temp_constraints; 4973 if (!pcbddc->use_nnsp_true && temp_constraints) { 4974 if (temp_constraints == 1) { /* just normalize the constraint */ 4975 PetscScalar norm,*ptr_to_data; 4976 4977 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 4978 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4979 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 4980 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 4981 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 4982 } else { /* perform SVD */ 4983 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 4984 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 4985 4986 #if defined(PETSC_MISSING_LAPACK_GESVD) 4987 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 4988 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 4989 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 4990 the constraints basis will differ (by a complex factor with absolute value equal to 1) 4991 from that computed using LAPACKgesvd 4992 -> This is due to a different computation of eigenvectors in LAPACKheev 4993 -> The quality of the POD-computed basis will be the same */ 4994 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 4995 /* Store upper triangular part of correlation matrix */ 4996 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4997 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4998 for (j=0;j<temp_constraints;j++) { 4999 for (k=0;k<j+1;k++) { 5000 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5001 } 5002 } 5003 /* compute eigenvalues and eigenvectors of correlation matrix */ 5004 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5005 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5006 #if !defined(PETSC_USE_COMPLEX) 5007 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5008 #else 5009 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5010 #endif 5011 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5012 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5013 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5014 j = 0; 5015 while (j < temp_constraints && singular_vals[j] < tol) j++; 5016 total_counts = total_counts-j; 5017 valid_constraints = temp_constraints-j; 5018 /* scale and copy POD basis into used quadrature memory */ 5019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5020 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5021 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5022 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5023 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5024 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5025 if (j<temp_constraints) { 5026 PetscInt ii; 5027 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5028 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5029 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5030 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5031 for (k=0;k<temp_constraints-j;k++) { 5032 for (ii=0;ii<size_of_constraint;ii++) { 5033 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5034 } 5035 } 5036 } 5037 #else /* on missing GESVD */ 5038 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5039 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5040 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5041 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5042 #if !defined(PETSC_USE_COMPLEX) 5043 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5044 #else 5045 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5046 #endif 5047 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5048 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5049 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5050 k = temp_constraints; 5051 if (k > size_of_constraint) k = size_of_constraint; 5052 j = 0; 5053 while (j < k && singular_vals[k-j-1] < tol) j++; 5054 valid_constraints = k-j; 5055 total_counts = total_counts-temp_constraints+valid_constraints; 5056 #endif /* on missing GESVD */ 5057 } 5058 } 5059 /* update pointers information */ 5060 if (valid_constraints) { 5061 constraints_n[total_counts_cc] = valid_constraints; 5062 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5063 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5064 /* set change_of_basis flag */ 5065 if (boolforchange) { 5066 PetscBTSet(change_basis,total_counts_cc); 5067 } 5068 total_counts_cc++; 5069 } 5070 } 5071 /* free workspace */ 5072 if (!skip_lapack) { 5073 ierr = PetscFree(work);CHKERRQ(ierr); 5074 #if defined(PETSC_USE_COMPLEX) 5075 ierr = PetscFree(rwork);CHKERRQ(ierr); 5076 #endif 5077 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5078 #if defined(PETSC_MISSING_LAPACK_GESVD) 5079 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5080 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5081 #endif 5082 } 5083 for (k=0;k<nnsp_size;k++) { 5084 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5085 } 5086 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5087 /* free index sets of faces, edges and vertices */ 5088 for (i=0;i<n_ISForFaces;i++) { 5089 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5090 } 5091 if (n_ISForFaces) { 5092 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5093 } 5094 for (i=0;i<n_ISForEdges;i++) { 5095 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5096 } 5097 if (n_ISForEdges) { 5098 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5099 } 5100 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5101 } else { 5102 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5103 5104 total_counts = 0; 5105 n_vertices = 0; 5106 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5107 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5108 } 5109 max_constraints = 0; 5110 total_counts_cc = 0; 5111 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5112 total_counts += pcbddc->adaptive_constraints_n[i]; 5113 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5114 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5115 } 5116 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5117 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5118 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5119 constraints_data = pcbddc->adaptive_constraints_data; 5120 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5121 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5122 total_counts_cc = 0; 5123 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5124 if (pcbddc->adaptive_constraints_n[i]) { 5125 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5126 } 5127 } 5128 #if 0 5129 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5130 for (i=0;i<total_counts_cc;i++) { 5131 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5132 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5133 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5134 printf(" %d",constraints_idxs[j]); 5135 } 5136 printf("\n"); 5137 printf("number of cc: %d\n",constraints_n[i]); 5138 } 5139 for (i=0;i<n_vertices;i++) { 5140 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5141 } 5142 for (i=0;i<sub_schurs->n_subs;i++) { 5143 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5144 } 5145 #endif 5146 5147 max_size_of_constraint = 0; 5148 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5149 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5150 /* Change of basis */ 5151 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5152 if (pcbddc->use_change_of_basis) { 5153 for (i=0;i<sub_schurs->n_subs;i++) { 5154 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5155 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5156 } 5157 } 5158 } 5159 } 5160 pcbddc->local_primal_size = total_counts; 5161 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5162 5163 /* map constraints_idxs in boundary numbering */ 5164 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5165 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5166 5167 /* Create constraint matrix */ 5168 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5169 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5170 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5171 5172 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5173 /* determine if a QR strategy is needed for change of basis */ 5174 qr_needed = PETSC_FALSE; 5175 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5176 total_primal_vertices=0; 5177 pcbddc->local_primal_size_cc = 0; 5178 for (i=0;i<total_counts_cc;i++) { 5179 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5180 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5181 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5182 pcbddc->local_primal_size_cc += 1; 5183 } else if (PetscBTLookup(change_basis,i)) { 5184 for (k=0;k<constraints_n[i];k++) { 5185 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5186 } 5187 pcbddc->local_primal_size_cc += constraints_n[i]; 5188 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5189 PetscBTSet(qr_needed_idx,i); 5190 qr_needed = PETSC_TRUE; 5191 } 5192 } else { 5193 pcbddc->local_primal_size_cc += 1; 5194 } 5195 } 5196 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5197 pcbddc->n_vertices = total_primal_vertices; 5198 /* permute indices in order to have a sorted set of vertices */ 5199 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5200 5201 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5202 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5203 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5204 5205 /* nonzero structure of constraint matrix */ 5206 /* and get reference dof for local constraints */ 5207 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5208 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5209 5210 j = total_primal_vertices; 5211 total_counts = total_primal_vertices; 5212 cum = total_primal_vertices; 5213 for (i=n_vertices;i<total_counts_cc;i++) { 5214 if (!PetscBTLookup(change_basis,i)) { 5215 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5216 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5217 cum++; 5218 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5219 for (k=0;k<constraints_n[i];k++) { 5220 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5221 nnz[j+k] = size_of_constraint; 5222 } 5223 j += constraints_n[i]; 5224 } 5225 } 5226 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5227 ierr = PetscFree(nnz);CHKERRQ(ierr); 5228 5229 /* set values in constraint matrix */ 5230 for (i=0;i<total_primal_vertices;i++) { 5231 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5232 } 5233 total_counts = total_primal_vertices; 5234 for (i=n_vertices;i<total_counts_cc;i++) { 5235 if (!PetscBTLookup(change_basis,i)) { 5236 PetscInt *cols; 5237 5238 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5239 cols = constraints_idxs+constraints_idxs_ptr[i]; 5240 for (k=0;k<constraints_n[i];k++) { 5241 PetscInt row = total_counts+k; 5242 PetscScalar *vals; 5243 5244 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5245 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5246 } 5247 total_counts += constraints_n[i]; 5248 } 5249 } 5250 /* assembling */ 5251 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5252 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5253 5254 /* 5255 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5256 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5257 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5258 */ 5259 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5260 if (pcbddc->use_change_of_basis) { 5261 /* dual and primal dofs on a single cc */ 5262 PetscInt dual_dofs,primal_dofs; 5263 /* working stuff for GEQRF */ 5264 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5265 PetscBLASInt lqr_work; 5266 /* working stuff for UNGQR */ 5267 PetscScalar *gqr_work,lgqr_work_t; 5268 PetscBLASInt lgqr_work; 5269 /* working stuff for TRTRS */ 5270 PetscScalar *trs_rhs; 5271 PetscBLASInt Blas_NRHS; 5272 /* pointers for values insertion into change of basis matrix */ 5273 PetscInt *start_rows,*start_cols; 5274 PetscScalar *start_vals; 5275 /* working stuff for values insertion */ 5276 PetscBT is_primal; 5277 PetscInt *aux_primal_numbering_B; 5278 /* matrix sizes */ 5279 PetscInt global_size,local_size; 5280 /* temporary change of basis */ 5281 Mat localChangeOfBasisMatrix; 5282 /* extra space for debugging */ 5283 PetscScalar *dbg_work; 5284 5285 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5286 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5287 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5288 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5289 /* nonzeros for local mat */ 5290 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5291 if (!pcbddc->benign_change || pcbddc->fake_change) { 5292 for (i=0;i<pcis->n;i++) nnz[i]=1; 5293 } else { 5294 const PetscInt *ii; 5295 PetscInt n; 5296 PetscBool flg_row; 5297 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5298 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5299 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5300 } 5301 for (i=n_vertices;i<total_counts_cc;i++) { 5302 if (PetscBTLookup(change_basis,i)) { 5303 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5304 if (PetscBTLookup(qr_needed_idx,i)) { 5305 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5306 } else { 5307 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5308 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5309 } 5310 } 5311 } 5312 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5313 ierr = PetscFree(nnz);CHKERRQ(ierr); 5314 /* Set interior change in the matrix */ 5315 if (!pcbddc->benign_change || pcbddc->fake_change) { 5316 for (i=0;i<pcis->n;i++) { 5317 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5318 } 5319 } else { 5320 const PetscInt *ii,*jj; 5321 PetscScalar *aa; 5322 PetscInt n; 5323 PetscBool flg_row; 5324 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5325 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5326 for (i=0;i<n;i++) { 5327 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5328 } 5329 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5330 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5331 } 5332 5333 if (pcbddc->dbg_flag) { 5334 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5335 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5336 } 5337 5338 5339 /* Now we loop on the constraints which need a change of basis */ 5340 /* 5341 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5342 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5343 5344 Basic blocks of change of basis matrix T computed by 5345 5346 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5347 5348 | 1 0 ... 0 s_1/S | 5349 | 0 1 ... 0 s_2/S | 5350 | ... | 5351 | 0 ... 1 s_{n-1}/S | 5352 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5353 5354 with S = \sum_{i=1}^n s_i^2 5355 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5356 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5357 5358 - QR decomposition of constraints otherwise 5359 */ 5360 if (qr_needed) { 5361 /* space to store Q */ 5362 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5363 /* array to store scaling factors for reflectors */ 5364 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5365 /* first we issue queries for optimal work */ 5366 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5367 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5368 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5369 lqr_work = -1; 5370 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5371 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5372 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5373 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5374 lgqr_work = -1; 5375 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5376 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5377 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5378 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5379 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5380 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5381 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5382 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5383 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5384 /* array to store rhs and solution of triangular solver */ 5385 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5386 /* allocating workspace for check */ 5387 if (pcbddc->dbg_flag) { 5388 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5389 } 5390 } 5391 /* array to store whether a node is primal or not */ 5392 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5393 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5394 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5395 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5396 for (i=0;i<total_primal_vertices;i++) { 5397 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5398 } 5399 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5400 5401 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5402 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5403 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5404 if (PetscBTLookup(change_basis,total_counts)) { 5405 /* get constraint info */ 5406 primal_dofs = constraints_n[total_counts]; 5407 dual_dofs = size_of_constraint-primal_dofs; 5408 5409 if (pcbddc->dbg_flag) { 5410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5411 } 5412 5413 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5414 5415 /* copy quadrature constraints for change of basis check */ 5416 if (pcbddc->dbg_flag) { 5417 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5418 } 5419 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5420 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5421 5422 /* compute QR decomposition of constraints */ 5423 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5424 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5425 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5426 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5427 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5428 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5429 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5430 5431 /* explictly compute R^-T */ 5432 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5433 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5434 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5435 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5436 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5437 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5438 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5439 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5440 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5441 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5442 5443 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5444 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5445 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5446 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5447 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5448 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5449 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5450 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5451 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5452 5453 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5454 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5455 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5456 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5457 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5458 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5459 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5460 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5461 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5462 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5463 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5464 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5465 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5466 5467 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5468 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5469 /* insert cols for primal dofs */ 5470 for (j=0;j<primal_dofs;j++) { 5471 start_vals = &qr_basis[j*size_of_constraint]; 5472 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5473 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5474 } 5475 /* insert cols for dual dofs */ 5476 for (j=0,k=0;j<dual_dofs;k++) { 5477 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5478 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5479 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5480 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5481 j++; 5482 } 5483 } 5484 5485 /* check change of basis */ 5486 if (pcbddc->dbg_flag) { 5487 PetscInt ii,jj; 5488 PetscBool valid_qr=PETSC_TRUE; 5489 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5490 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5491 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5492 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5493 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5494 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5495 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5496 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5497 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5498 for (jj=0;jj<size_of_constraint;jj++) { 5499 for (ii=0;ii<primal_dofs;ii++) { 5500 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5501 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5502 } 5503 } 5504 if (!valid_qr) { 5505 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5506 for (jj=0;jj<size_of_constraint;jj++) { 5507 for (ii=0;ii<primal_dofs;ii++) { 5508 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5509 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5510 } 5511 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5512 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5513 } 5514 } 5515 } 5516 } else { 5517 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5518 } 5519 } 5520 } else { /* simple transformation block */ 5521 PetscInt row,col; 5522 PetscScalar val,norm; 5523 5524 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5525 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5526 for (j=0;j<size_of_constraint;j++) { 5527 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5528 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5529 if (!PetscBTLookup(is_primal,row_B)) { 5530 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5531 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5532 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5533 } else { 5534 for (k=0;k<size_of_constraint;k++) { 5535 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5536 if (row != col) { 5537 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5538 } else { 5539 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5540 } 5541 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5542 } 5543 } 5544 } 5545 if (pcbddc->dbg_flag) { 5546 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5547 } 5548 } 5549 } else { 5550 if (pcbddc->dbg_flag) { 5551 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5552 } 5553 } 5554 } 5555 5556 /* free workspace */ 5557 if (qr_needed) { 5558 if (pcbddc->dbg_flag) { 5559 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5560 } 5561 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5562 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5563 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5564 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5565 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5566 } 5567 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5568 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5569 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5570 5571 /* assembling of global change of variable */ 5572 if (!pcbddc->fake_change) { 5573 Mat tmat; 5574 PetscInt bs; 5575 5576 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5577 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5578 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5579 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5580 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5581 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5582 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5583 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5584 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5585 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5586 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5587 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5588 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5589 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5590 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5591 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5592 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5593 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5594 5595 /* check */ 5596 if (pcbddc->dbg_flag) { 5597 PetscReal error; 5598 Vec x,x_change; 5599 5600 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5601 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5602 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5603 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5604 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5605 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5606 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5607 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5608 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5609 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5610 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5611 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5612 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5613 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5614 ierr = VecDestroy(&x);CHKERRQ(ierr); 5615 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5616 } 5617 /* adapt sub_schurs computed (if any) */ 5618 if (pcbddc->use_deluxe_scaling) { 5619 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5620 5621 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5622 if (sub_schurs && sub_schurs->S_Ej_all) { 5623 Mat S_new,tmat; 5624 IS is_all_N,is_V_Sall = NULL; 5625 5626 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5627 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5628 if (pcbddc->deluxe_zerorows) { 5629 ISLocalToGlobalMapping NtoSall; 5630 IS is_V; 5631 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5632 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5633 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5634 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5635 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5636 } 5637 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5638 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5639 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5640 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5641 if (pcbddc->deluxe_zerorows) { 5642 const PetscScalar *array; 5643 const PetscInt *idxs_V,*idxs_all; 5644 PetscInt i,n_V; 5645 5646 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5647 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5648 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5649 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5650 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5651 for (i=0;i<n_V;i++) { 5652 PetscScalar val; 5653 PetscInt idx; 5654 5655 idx = idxs_V[i]; 5656 val = array[idxs_all[idxs_V[i]]]; 5657 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5658 } 5659 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5660 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5661 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5662 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5663 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5664 } 5665 sub_schurs->S_Ej_all = S_new; 5666 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5667 if (sub_schurs->sum_S_Ej_all) { 5668 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5669 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5670 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5671 if (pcbddc->deluxe_zerorows) { 5672 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5673 } 5674 sub_schurs->sum_S_Ej_all = S_new; 5675 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5676 } 5677 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5678 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5679 } 5680 /* destroy any change of basis context in sub_schurs */ 5681 if (sub_schurs && sub_schurs->change) { 5682 PetscInt i; 5683 5684 for (i=0;i<sub_schurs->n_subs;i++) { 5685 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5686 } 5687 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5688 } 5689 } 5690 if (pcbddc->switch_static) { /* need to save the local change */ 5691 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5692 } else { 5693 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5694 } 5695 /* determine if any process has changed the pressures locally */ 5696 pcbddc->change_interior = pcbddc->benign_have_null; 5697 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5698 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5699 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5700 pcbddc->use_qr_single = qr_needed; 5701 } 5702 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5703 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5704 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5705 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5706 } else { 5707 Mat benign_global = NULL; 5708 if (pcbddc->benign_have_null) { 5709 Mat tmat; 5710 5711 pcbddc->change_interior = PETSC_TRUE; 5712 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5713 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5714 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5715 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5716 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5717 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5718 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5719 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5720 if (pcbddc->benign_change) { 5721 Mat M; 5722 5723 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5724 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5725 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5726 ierr = MatDestroy(&M);CHKERRQ(ierr); 5727 } else { 5728 Mat eye; 5729 PetscScalar *array; 5730 5731 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5732 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5733 for (i=0;i<pcis->n;i++) { 5734 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5735 } 5736 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5737 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5738 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5739 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5740 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5741 } 5742 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5743 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5744 } 5745 if (pcbddc->user_ChangeOfBasisMatrix) { 5746 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5747 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5748 } else if (pcbddc->benign_have_null) { 5749 pcbddc->ChangeOfBasisMatrix = benign_global; 5750 } 5751 } 5752 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5753 IS is_global; 5754 const PetscInt *gidxs; 5755 5756 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5757 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5758 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5759 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5760 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5761 } 5762 } 5763 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5764 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5765 } 5766 5767 if (!pcbddc->fake_change) { 5768 /* add pressure dofs to set of primal nodes for numbering purposes */ 5769 for (i=0;i<pcbddc->benign_n;i++) { 5770 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5771 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5772 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5773 pcbddc->local_primal_size_cc++; 5774 pcbddc->local_primal_size++; 5775 } 5776 5777 /* check if a new primal space has been introduced (also take into account benign trick) */ 5778 pcbddc->new_primal_space_local = PETSC_TRUE; 5779 if (olocal_primal_size == pcbddc->local_primal_size) { 5780 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5781 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5782 if (!pcbddc->new_primal_space_local) { 5783 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5784 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5785 } 5786 } 5787 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5788 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5789 } 5790 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5791 5792 /* flush dbg viewer */ 5793 if (pcbddc->dbg_flag) { 5794 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5795 } 5796 5797 /* free workspace */ 5798 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5799 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5800 if (!pcbddc->adaptive_selection) { 5801 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5802 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5803 } else { 5804 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5805 pcbddc->adaptive_constraints_idxs_ptr, 5806 pcbddc->adaptive_constraints_data_ptr, 5807 pcbddc->adaptive_constraints_idxs, 5808 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5809 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5810 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5811 } 5812 PetscFunctionReturn(0); 5813 } 5814 5815 #undef __FUNCT__ 5816 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5817 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5818 { 5819 ISLocalToGlobalMapping map; 5820 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5821 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5822 PetscInt ierr,i,N; 5823 5824 PetscFunctionBegin; 5825 if (pcbddc->graphanalyzed && !pcbddc->recompute_topography) PetscFunctionReturn(0); 5826 /* Reset previously computed graph */ 5827 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5828 /* Init local Graph struct */ 5829 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5830 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5831 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5832 5833 /* Check validity of the csr graph passed in by the user */ 5834 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5835 5836 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5837 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5838 PetscInt *xadj,*adjncy; 5839 PetscInt nvtxs; 5840 PetscBool flg_row=PETSC_FALSE; 5841 5842 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5843 if (flg_row) { 5844 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5845 pcbddc->computed_rowadj = PETSC_TRUE; 5846 } 5847 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5848 } 5849 if (pcbddc->dbg_flag) { 5850 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5851 } 5852 5853 /* Setup of Graph */ 5854 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5855 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5856 5857 /* attach info on disconnected subdomains if present */ 5858 if (pcbddc->n_local_subs) { 5859 PetscInt *local_subs; 5860 5861 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5862 for (i=0;i<pcbddc->n_local_subs;i++) { 5863 const PetscInt *idxs; 5864 PetscInt nl,j; 5865 5866 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5867 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5868 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 5869 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5870 } 5871 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 5872 pcbddc->mat_graph->local_subs = local_subs; 5873 } 5874 5875 /* Graph's connected components analysis */ 5876 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 5877 5878 /* set flag indicating analysis has been done */ 5879 pcbddc->graphanalyzed = PETSC_TRUE; 5880 PetscFunctionReturn(0); 5881 } 5882 5883 #undef __FUNCT__ 5884 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 5885 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 5886 { 5887 PetscInt i,j; 5888 PetscScalar *alphas; 5889 PetscErrorCode ierr; 5890 5891 PetscFunctionBegin; 5892 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 5893 for (i=0;i<n;i++) { 5894 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 5895 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 5896 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 5897 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5898 } 5899 ierr = PetscFree(alphas);CHKERRQ(ierr); 5900 PetscFunctionReturn(0); 5901 } 5902 5903 #undef __FUNCT__ 5904 #define __FUNCT__ "MatISGetSubassemblingPattern" 5905 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5906 { 5907 Mat A; 5908 PetscInt n_neighs,*neighs,*n_shared,**shared; 5909 PetscMPIInt size,rank,color; 5910 PetscInt *xadj,*adjncy; 5911 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5912 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5913 PetscInt void_procs,*procs_candidates = NULL; 5914 PetscInt xadj_count, *count; 5915 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5916 PetscSubcomm psubcomm; 5917 MPI_Comm subcomm; 5918 PetscErrorCode ierr; 5919 5920 PetscFunctionBegin; 5921 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5922 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5923 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5924 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5925 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5926 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5927 5928 if (have_void) *have_void = PETSC_FALSE; 5929 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5930 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5931 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5932 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5933 im_active = !!(n); 5934 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5935 void_procs = size - active_procs; 5936 /* get ranks of of non-active processes in mat communicator */ 5937 if (void_procs) { 5938 PetscInt ncand; 5939 5940 if (have_void) *have_void = PETSC_TRUE; 5941 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5942 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5943 for (i=0,ncand=0;i<size;i++) { 5944 if (!procs_candidates[i]) { 5945 procs_candidates[ncand++] = i; 5946 } 5947 } 5948 /* force n_subdomains to be not greater that the number of non-active processes */ 5949 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5950 } 5951 5952 /* number of subdomains requested greater than active processes -> just shift the matrix 5953 number of subdomains requested 1 -> send to master or first candidate in voids */ 5954 if (active_procs < *n_subdomains || *n_subdomains == 1) { 5955 PetscInt issize,isidx,dest; 5956 if (*n_subdomains == 1) dest = 0; 5957 else dest = rank; 5958 if (im_active) { 5959 issize = 1; 5960 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5961 isidx = procs_candidates[dest]; 5962 } else { 5963 isidx = dest; 5964 } 5965 } else { 5966 issize = 0; 5967 isidx = -1; 5968 } 5969 *n_subdomains = active_procs; 5970 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 5971 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5972 PetscFunctionReturn(0); 5973 } 5974 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 5975 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 5976 threshold = PetscMax(threshold,2); 5977 5978 /* Get info on mapping */ 5979 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 5980 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5981 5982 /* build local CSR graph of subdomains' connectivity */ 5983 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 5984 xadj[0] = 0; 5985 xadj[1] = PetscMax(n_neighs-1,0); 5986 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 5987 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 5988 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 5989 for (i=1;i<n_neighs;i++) 5990 for (j=0;j<n_shared[i];j++) 5991 count[shared[i][j]] += 1; 5992 5993 xadj_count = 0; 5994 for (i=1;i<n_neighs;i++) { 5995 for (j=0;j<n_shared[i];j++) { 5996 if (count[shared[i][j]] < threshold) { 5997 adjncy[xadj_count] = neighs[i]; 5998 adjncy_wgt[xadj_count] = n_shared[i]; 5999 xadj_count++; 6000 break; 6001 } 6002 } 6003 } 6004 xadj[1] = xadj_count; 6005 ierr = PetscFree(count);CHKERRQ(ierr); 6006 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6007 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6008 6009 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6010 6011 /* Restrict work on active processes only */ 6012 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6013 if (void_procs) { 6014 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6015 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6016 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6017 subcomm = PetscSubcommChild(psubcomm); 6018 } else { 6019 psubcomm = NULL; 6020 subcomm = PetscObjectComm((PetscObject)mat); 6021 } 6022 6023 v_wgt = NULL; 6024 if (!color) { 6025 ierr = PetscFree(xadj);CHKERRQ(ierr); 6026 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6027 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6028 } else { 6029 Mat subdomain_adj; 6030 IS new_ranks,new_ranks_contig; 6031 MatPartitioning partitioner; 6032 PetscInt rstart=0,rend=0; 6033 PetscInt *is_indices,*oldranks; 6034 PetscMPIInt size; 6035 PetscBool aggregate; 6036 6037 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6038 if (void_procs) { 6039 PetscInt prank = rank; 6040 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6041 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6042 for (i=0;i<xadj[1];i++) { 6043 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6044 } 6045 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6046 } else { 6047 oldranks = NULL; 6048 } 6049 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6050 if (aggregate) { /* TODO: all this part could be made more efficient */ 6051 PetscInt lrows,row,ncols,*cols; 6052 PetscMPIInt nrank; 6053 PetscScalar *vals; 6054 6055 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6056 lrows = 0; 6057 if (nrank<redprocs) { 6058 lrows = size/redprocs; 6059 if (nrank<size%redprocs) lrows++; 6060 } 6061 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6062 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6063 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6064 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6065 row = nrank; 6066 ncols = xadj[1]-xadj[0]; 6067 cols = adjncy; 6068 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6069 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6070 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6071 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6072 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6073 ierr = PetscFree(xadj);CHKERRQ(ierr); 6074 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6075 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6076 ierr = PetscFree(vals);CHKERRQ(ierr); 6077 if (use_vwgt) { 6078 Vec v; 6079 const PetscScalar *array; 6080 PetscInt nl; 6081 6082 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6083 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6084 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6085 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6086 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6087 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6088 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6089 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6090 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6091 ierr = VecDestroy(&v);CHKERRQ(ierr); 6092 } 6093 } else { 6094 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6095 if (use_vwgt) { 6096 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6097 v_wgt[0] = local_size; 6098 } 6099 } 6100 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6101 6102 /* Partition */ 6103 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6104 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6105 if (v_wgt) { 6106 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6107 } 6108 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6109 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6110 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6111 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6112 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6113 6114 /* renumber new_ranks to avoid "holes" in new set of processors */ 6115 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6116 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6117 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6118 if (!aggregate) { 6119 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6120 #if defined(PETSC_USE_DEBUG) 6121 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6122 #endif 6123 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6124 } else if (oldranks) { 6125 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6126 } else { 6127 ranks_send_to_idx[0] = is_indices[0]; 6128 } 6129 } else { 6130 PetscInt idxs[1]; 6131 PetscMPIInt tag; 6132 MPI_Request *reqs; 6133 6134 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6135 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6136 for (i=rstart;i<rend;i++) { 6137 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6138 } 6139 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6140 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6141 ierr = PetscFree(reqs);CHKERRQ(ierr); 6142 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6143 #if defined(PETSC_USE_DEBUG) 6144 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6145 #endif 6146 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6147 } else if (oldranks) { 6148 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6149 } else { 6150 ranks_send_to_idx[0] = idxs[0]; 6151 } 6152 } 6153 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6154 /* clean up */ 6155 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6156 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6157 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6158 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6159 } 6160 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6161 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6162 6163 /* assemble parallel IS for sends */ 6164 i = 1; 6165 if (!color) i=0; 6166 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6167 PetscFunctionReturn(0); 6168 } 6169 6170 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6171 6172 #undef __FUNCT__ 6173 #define __FUNCT__ "MatISSubassemble" 6174 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6175 { 6176 Mat local_mat; 6177 IS is_sends_internal; 6178 PetscInt rows,cols,new_local_rows; 6179 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6180 PetscBool ismatis,isdense,newisdense,destroy_mat; 6181 ISLocalToGlobalMapping l2gmap; 6182 PetscInt* l2gmap_indices; 6183 const PetscInt* is_indices; 6184 MatType new_local_type; 6185 /* buffers */ 6186 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6187 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6188 PetscInt *recv_buffer_idxs_local; 6189 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6190 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6191 /* MPI */ 6192 MPI_Comm comm,comm_n; 6193 PetscSubcomm subcomm; 6194 PetscMPIInt n_sends,n_recvs,commsize; 6195 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6196 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6197 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6198 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6199 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6200 PetscErrorCode ierr; 6201 6202 PetscFunctionBegin; 6203 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6204 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6205 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6206 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6207 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6208 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6209 PetscValidLogicalCollectiveBool(mat,reuse,6); 6210 PetscValidLogicalCollectiveInt(mat,nis,8); 6211 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6212 if (nvecs) { 6213 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6214 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6215 } 6216 /* further checks */ 6217 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6218 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6219 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6220 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6221 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6222 if (reuse && *mat_n) { 6223 PetscInt mrows,mcols,mnrows,mncols; 6224 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6225 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6226 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6227 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6228 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6229 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6230 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6231 } 6232 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6233 PetscValidLogicalCollectiveInt(mat,bs,0); 6234 6235 /* prepare IS for sending if not provided */ 6236 if (!is_sends) { 6237 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6238 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6239 } else { 6240 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6241 is_sends_internal = is_sends; 6242 } 6243 6244 /* get comm */ 6245 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6246 6247 /* compute number of sends */ 6248 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6249 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6250 6251 /* compute number of receives */ 6252 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6253 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6254 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6255 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6256 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6257 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6258 ierr = PetscFree(iflags);CHKERRQ(ierr); 6259 6260 /* restrict comm if requested */ 6261 subcomm = 0; 6262 destroy_mat = PETSC_FALSE; 6263 if (restrict_comm) { 6264 PetscMPIInt color,subcommsize; 6265 6266 color = 0; 6267 if (restrict_full) { 6268 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6269 } else { 6270 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6271 } 6272 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6273 subcommsize = commsize - subcommsize; 6274 /* check if reuse has been requested */ 6275 if (reuse) { 6276 if (*mat_n) { 6277 PetscMPIInt subcommsize2; 6278 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6279 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6280 comm_n = PetscObjectComm((PetscObject)*mat_n); 6281 } else { 6282 comm_n = PETSC_COMM_SELF; 6283 } 6284 } else { /* MAT_INITIAL_MATRIX */ 6285 PetscMPIInt rank; 6286 6287 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6288 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6289 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6290 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6291 comm_n = PetscSubcommChild(subcomm); 6292 } 6293 /* flag to destroy *mat_n if not significative */ 6294 if (color) destroy_mat = PETSC_TRUE; 6295 } else { 6296 comm_n = comm; 6297 } 6298 6299 /* prepare send/receive buffers */ 6300 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6301 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6302 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6303 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6304 if (nis) { 6305 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6306 } 6307 6308 /* Get data from local matrices */ 6309 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6310 /* TODO: See below some guidelines on how to prepare the local buffers */ 6311 /* 6312 send_buffer_vals should contain the raw values of the local matrix 6313 send_buffer_idxs should contain: 6314 - MatType_PRIVATE type 6315 - PetscInt size_of_l2gmap 6316 - PetscInt global_row_indices[size_of_l2gmap] 6317 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6318 */ 6319 else { 6320 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6321 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6322 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6323 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6324 send_buffer_idxs[1] = i; 6325 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6326 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6327 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6328 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6329 for (i=0;i<n_sends;i++) { 6330 ilengths_vals[is_indices[i]] = len*len; 6331 ilengths_idxs[is_indices[i]] = len+2; 6332 } 6333 } 6334 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6335 /* additional is (if any) */ 6336 if (nis) { 6337 PetscMPIInt psum; 6338 PetscInt j; 6339 for (j=0,psum=0;j<nis;j++) { 6340 PetscInt plen; 6341 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6342 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6343 psum += len+1; /* indices + lenght */ 6344 } 6345 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6346 for (j=0,psum=0;j<nis;j++) { 6347 PetscInt plen; 6348 const PetscInt *is_array_idxs; 6349 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6350 send_buffer_idxs_is[psum] = plen; 6351 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6352 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6353 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6354 psum += plen+1; /* indices + lenght */ 6355 } 6356 for (i=0;i<n_sends;i++) { 6357 ilengths_idxs_is[is_indices[i]] = psum; 6358 } 6359 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6360 } 6361 6362 buf_size_idxs = 0; 6363 buf_size_vals = 0; 6364 buf_size_idxs_is = 0; 6365 buf_size_vecs = 0; 6366 for (i=0;i<n_recvs;i++) { 6367 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6368 buf_size_vals += (PetscInt)olengths_vals[i]; 6369 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6370 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6371 } 6372 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6373 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6374 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6375 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6376 6377 /* get new tags for clean communications */ 6378 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6379 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6380 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6381 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6382 6383 /* allocate for requests */ 6384 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6385 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6386 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6387 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6388 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6389 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6390 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6391 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6392 6393 /* communications */ 6394 ptr_idxs = recv_buffer_idxs; 6395 ptr_vals = recv_buffer_vals; 6396 ptr_idxs_is = recv_buffer_idxs_is; 6397 ptr_vecs = recv_buffer_vecs; 6398 for (i=0;i<n_recvs;i++) { 6399 source_dest = onodes[i]; 6400 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6401 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6402 ptr_idxs += olengths_idxs[i]; 6403 ptr_vals += olengths_vals[i]; 6404 if (nis) { 6405 source_dest = onodes_is[i]; 6406 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6407 ptr_idxs_is += olengths_idxs_is[i]; 6408 } 6409 if (nvecs) { 6410 source_dest = onodes[i]; 6411 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6412 ptr_vecs += olengths_idxs[i]-2; 6413 } 6414 } 6415 for (i=0;i<n_sends;i++) { 6416 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6417 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6418 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6419 if (nis) { 6420 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6421 } 6422 if (nvecs) { 6423 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6424 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6425 } 6426 } 6427 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6428 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6429 6430 /* assemble new l2g map */ 6431 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6432 ptr_idxs = recv_buffer_idxs; 6433 new_local_rows = 0; 6434 for (i=0;i<n_recvs;i++) { 6435 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6436 ptr_idxs += olengths_idxs[i]; 6437 } 6438 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6439 ptr_idxs = recv_buffer_idxs; 6440 new_local_rows = 0; 6441 for (i=0;i<n_recvs;i++) { 6442 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6443 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6444 ptr_idxs += olengths_idxs[i]; 6445 } 6446 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6447 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6448 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6449 6450 /* infer new local matrix type from received local matrices type */ 6451 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6452 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6453 if (n_recvs) { 6454 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6455 ptr_idxs = recv_buffer_idxs; 6456 for (i=0;i<n_recvs;i++) { 6457 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6458 new_local_type_private = MATAIJ_PRIVATE; 6459 break; 6460 } 6461 ptr_idxs += olengths_idxs[i]; 6462 } 6463 switch (new_local_type_private) { 6464 case MATDENSE_PRIVATE: 6465 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6466 new_local_type = MATSEQAIJ; 6467 bs = 1; 6468 } else { /* if I receive only 1 dense matrix */ 6469 new_local_type = MATSEQDENSE; 6470 bs = 1; 6471 } 6472 break; 6473 case MATAIJ_PRIVATE: 6474 new_local_type = MATSEQAIJ; 6475 bs = 1; 6476 break; 6477 case MATBAIJ_PRIVATE: 6478 new_local_type = MATSEQBAIJ; 6479 break; 6480 case MATSBAIJ_PRIVATE: 6481 new_local_type = MATSEQSBAIJ; 6482 break; 6483 default: 6484 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6485 break; 6486 } 6487 } else { /* by default, new_local_type is seqdense */ 6488 new_local_type = MATSEQDENSE; 6489 bs = 1; 6490 } 6491 6492 /* create MATIS object if needed */ 6493 if (!reuse) { 6494 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6495 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6496 } else { 6497 /* it also destroys the local matrices */ 6498 if (*mat_n) { 6499 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6500 } else { /* this is a fake object */ 6501 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6502 } 6503 } 6504 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6505 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6506 6507 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6508 6509 /* Global to local map of received indices */ 6510 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6511 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6512 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6513 6514 /* restore attributes -> type of incoming data and its size */ 6515 buf_size_idxs = 0; 6516 for (i=0;i<n_recvs;i++) { 6517 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6518 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6519 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6520 } 6521 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6522 6523 /* set preallocation */ 6524 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6525 if (!newisdense) { 6526 PetscInt *new_local_nnz=0; 6527 6528 ptr_idxs = recv_buffer_idxs_local; 6529 if (n_recvs) { 6530 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6531 } 6532 for (i=0;i<n_recvs;i++) { 6533 PetscInt j; 6534 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6535 for (j=0;j<*(ptr_idxs+1);j++) { 6536 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6537 } 6538 } else { 6539 /* TODO */ 6540 } 6541 ptr_idxs += olengths_idxs[i]; 6542 } 6543 if (new_local_nnz) { 6544 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6545 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6546 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6547 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6548 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6549 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6550 } else { 6551 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6552 } 6553 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6554 } else { 6555 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6556 } 6557 6558 /* set values */ 6559 ptr_vals = recv_buffer_vals; 6560 ptr_idxs = recv_buffer_idxs_local; 6561 for (i=0;i<n_recvs;i++) { 6562 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6563 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6564 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6565 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6566 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6567 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6568 } else { 6569 /* TODO */ 6570 } 6571 ptr_idxs += olengths_idxs[i]; 6572 ptr_vals += olengths_vals[i]; 6573 } 6574 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6575 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6576 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6577 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6578 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6579 6580 #if 0 6581 if (!restrict_comm) { /* check */ 6582 Vec lvec,rvec; 6583 PetscReal infty_error; 6584 6585 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6586 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6587 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6588 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6589 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6590 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6591 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6592 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6593 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6594 } 6595 #endif 6596 6597 /* assemble new additional is (if any) */ 6598 if (nis) { 6599 PetscInt **temp_idxs,*count_is,j,psum; 6600 6601 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6602 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6603 ptr_idxs = recv_buffer_idxs_is; 6604 psum = 0; 6605 for (i=0;i<n_recvs;i++) { 6606 for (j=0;j<nis;j++) { 6607 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6608 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6609 psum += plen; 6610 ptr_idxs += plen+1; /* shift pointer to received data */ 6611 } 6612 } 6613 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6614 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6615 for (i=1;i<nis;i++) { 6616 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6617 } 6618 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6619 ptr_idxs = recv_buffer_idxs_is; 6620 for (i=0;i<n_recvs;i++) { 6621 for (j=0;j<nis;j++) { 6622 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6623 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6624 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6625 ptr_idxs += plen+1; /* shift pointer to received data */ 6626 } 6627 } 6628 for (i=0;i<nis;i++) { 6629 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6630 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6631 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6632 } 6633 ierr = PetscFree(count_is);CHKERRQ(ierr); 6634 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6635 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6636 } 6637 /* free workspace */ 6638 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6639 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6640 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6641 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6642 if (isdense) { 6643 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6644 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6645 } else { 6646 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6647 } 6648 if (nis) { 6649 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6650 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6651 } 6652 6653 if (nvecs) { 6654 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6655 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6656 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6657 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6658 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6659 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6660 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6661 /* set values */ 6662 ptr_vals = recv_buffer_vecs; 6663 ptr_idxs = recv_buffer_idxs_local; 6664 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6665 for (i=0;i<n_recvs;i++) { 6666 PetscInt j; 6667 for (j=0;j<*(ptr_idxs+1);j++) { 6668 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6669 } 6670 ptr_idxs += olengths_idxs[i]; 6671 ptr_vals += olengths_idxs[i]-2; 6672 } 6673 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6674 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6675 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6676 } 6677 6678 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6679 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6680 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6681 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6682 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6683 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6684 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6685 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6686 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6687 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6688 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6689 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6690 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6691 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6692 ierr = PetscFree(onodes);CHKERRQ(ierr); 6693 if (nis) { 6694 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6695 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6696 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6697 } 6698 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6699 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6700 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6701 for (i=0;i<nis;i++) { 6702 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6703 } 6704 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6705 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6706 } 6707 *mat_n = NULL; 6708 } 6709 PetscFunctionReturn(0); 6710 } 6711 6712 /* temporary hack into ksp private data structure */ 6713 #include <petsc/private/kspimpl.h> 6714 6715 #undef __FUNCT__ 6716 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6717 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6718 { 6719 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6720 PC_IS *pcis = (PC_IS*)pc->data; 6721 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6722 Mat coarsedivudotp = NULL; 6723 MatNullSpace CoarseNullSpace = NULL; 6724 ISLocalToGlobalMapping coarse_islg; 6725 IS coarse_is,*isarray; 6726 PetscInt i,im_active=-1,active_procs=-1; 6727 PetscInt nis,nisdofs,nisneu,nisvert; 6728 PC pc_temp; 6729 PCType coarse_pc_type; 6730 KSPType coarse_ksp_type; 6731 PetscBool multilevel_requested,multilevel_allowed; 6732 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6733 Mat t_coarse_mat_is; 6734 PetscInt ncoarse; 6735 PetscBool compute_vecs = PETSC_FALSE; 6736 PetscScalar *array; 6737 MatReuse coarse_mat_reuse; 6738 PetscBool restr, full_restr, have_void; 6739 PetscErrorCode ierr; 6740 6741 PetscFunctionBegin; 6742 /* Assign global numbering to coarse dofs */ 6743 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6744 PetscInt ocoarse_size; 6745 compute_vecs = PETSC_TRUE; 6746 ocoarse_size = pcbddc->coarse_size; 6747 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6748 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6749 /* see if we can avoid some work */ 6750 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6751 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6752 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6753 PC pc; 6754 PetscBool isbddc; 6755 6756 /* temporary workaround since PCBDDC does not have a reset method so far */ 6757 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6758 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6759 if (isbddc) { 6760 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6761 } else { 6762 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6763 } 6764 coarse_reuse = PETSC_FALSE; 6765 } else { /* we can safely reuse already computed coarse matrix */ 6766 coarse_reuse = PETSC_TRUE; 6767 } 6768 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6769 coarse_reuse = PETSC_FALSE; 6770 } 6771 /* reset any subassembling information */ 6772 if (!coarse_reuse || pcbddc->recompute_topography) { 6773 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6774 } 6775 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6776 coarse_reuse = PETSC_TRUE; 6777 } 6778 /* assemble coarse matrix */ 6779 if (coarse_reuse && pcbddc->coarse_ksp) { 6780 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6781 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6782 coarse_mat_reuse = MAT_REUSE_MATRIX; 6783 } else { 6784 coarse_mat = NULL; 6785 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6786 } 6787 6788 /* creates temporary l2gmap and IS for coarse indexes */ 6789 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6790 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6791 6792 /* creates temporary MATIS object for coarse matrix */ 6793 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6794 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6795 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6796 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6797 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6798 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6799 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6800 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6801 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6802 6803 /* count "active" (i.e. with positive local size) and "void" processes */ 6804 im_active = !!(pcis->n); 6805 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6806 6807 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6808 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6809 /* full_restr : just use the receivers from the subassembling pattern */ 6810 coarse_mat_is = NULL; 6811 multilevel_allowed = PETSC_FALSE; 6812 multilevel_requested = PETSC_FALSE; 6813 full_restr = PETSC_TRUE; 6814 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6815 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6816 if (multilevel_requested) { 6817 ncoarse = active_procs/pcbddc->coarsening_ratio; 6818 restr = PETSC_FALSE; 6819 full_restr = PETSC_FALSE; 6820 } else { 6821 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6822 restr = PETSC_TRUE; 6823 full_restr = PETSC_TRUE; 6824 } 6825 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6826 ncoarse = PetscMax(1,ncoarse); 6827 if (!pcbddc->coarse_subassembling) { 6828 if (pcbddc->coarsening_ratio > 1) { 6829 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6830 } else { 6831 PetscMPIInt size,rank; 6832 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6833 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6834 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6835 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6836 } 6837 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6838 PetscInt psum; 6839 PetscMPIInt size; 6840 if (pcbddc->coarse_ksp) psum = 1; 6841 else psum = 0; 6842 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6843 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6844 if (ncoarse < size) have_void = PETSC_TRUE; 6845 } 6846 /* determine if we can go multilevel */ 6847 if (multilevel_requested) { 6848 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6849 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6850 } 6851 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6852 6853 /* dump subassembling pattern */ 6854 if (pcbddc->dbg_flag && multilevel_allowed) { 6855 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6856 } 6857 6858 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6859 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6860 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6861 const PetscInt *idxs; 6862 ISLocalToGlobalMapping tmap; 6863 6864 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6865 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6866 /* allocate space for temporary storage */ 6867 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6868 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 6869 /* allocate for IS array */ 6870 nisdofs = pcbddc->n_ISForDofsLocal; 6871 nisneu = !!pcbddc->NeumannBoundariesLocal; 6872 nisvert = 0; /* nisvert is not used */ 6873 nis = nisdofs + nisneu + nisvert; 6874 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 6875 /* dofs splitting */ 6876 for (i=0;i<nisdofs;i++) { 6877 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 6878 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 6879 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6880 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6881 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6882 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6883 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6884 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 6885 } 6886 /* neumann boundaries */ 6887 if (pcbddc->NeumannBoundariesLocal) { 6888 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 6889 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 6890 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6891 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6892 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6893 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6894 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 6895 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 6896 } 6897 /* free memory */ 6898 ierr = PetscFree(tidxs);CHKERRQ(ierr); 6899 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 6900 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6901 } else { 6902 nis = 0; 6903 nisdofs = 0; 6904 nisneu = 0; 6905 nisvert = 0; 6906 isarray = NULL; 6907 } 6908 /* destroy no longer needed map */ 6909 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6910 6911 /* subassemble */ 6912 if (multilevel_allowed) { 6913 Vec vp[1]; 6914 PetscInt nvecs = 0; 6915 PetscBool reuse,reuser; 6916 6917 if (coarse_mat) reuse = PETSC_TRUE; 6918 else reuse = PETSC_FALSE; 6919 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6920 vp[0] = NULL; 6921 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6922 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6923 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6924 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6925 nvecs = 1; 6926 6927 if (pcbddc->divudotp) { 6928 Mat B,loc_divudotp; 6929 Vec v,p; 6930 IS dummy; 6931 PetscInt np; 6932 6933 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6934 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6935 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6936 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6937 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6938 ierr = VecSet(p,1.);CHKERRQ(ierr); 6939 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6940 ierr = VecDestroy(&p);CHKERRQ(ierr); 6941 ierr = MatDestroy(&B);CHKERRQ(ierr); 6942 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6943 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6944 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6945 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6946 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6947 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6948 ierr = VecDestroy(&v);CHKERRQ(ierr); 6949 } 6950 } 6951 if (reuser) { 6952 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6953 } else { 6954 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6955 } 6956 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6957 PetscScalar *arraym,*arrayv; 6958 PetscInt nl; 6959 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 6960 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 6961 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6962 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 6963 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 6964 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 6965 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6966 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 6967 } else { 6968 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 6969 } 6970 } else { 6971 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 6972 } 6973 if (coarse_mat_is || coarse_mat) { 6974 PetscMPIInt size; 6975 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 6976 if (!multilevel_allowed) { 6977 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 6978 } else { 6979 Mat A; 6980 6981 /* if this matrix is present, it means we are not reusing the coarse matrix */ 6982 if (coarse_mat_is) { 6983 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 6984 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 6985 coarse_mat = coarse_mat_is; 6986 } 6987 /* be sure we don't have MatSeqDENSE as local mat */ 6988 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 6989 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 6990 } 6991 } 6992 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 6993 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 6994 6995 /* create local to global scatters for coarse problem */ 6996 if (compute_vecs) { 6997 PetscInt lrows; 6998 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 6999 if (coarse_mat) { 7000 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7001 } else { 7002 lrows = 0; 7003 } 7004 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7005 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7006 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7007 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7008 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7009 } 7010 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7011 7012 /* set defaults for coarse KSP and PC */ 7013 if (multilevel_allowed) { 7014 coarse_ksp_type = KSPRICHARDSON; 7015 coarse_pc_type = PCBDDC; 7016 } else { 7017 coarse_ksp_type = KSPPREONLY; 7018 coarse_pc_type = PCREDUNDANT; 7019 } 7020 7021 /* print some info if requested */ 7022 if (pcbddc->dbg_flag) { 7023 if (!multilevel_allowed) { 7024 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7025 if (multilevel_requested) { 7026 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7027 } else if (pcbddc->max_levels) { 7028 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7029 } 7030 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7031 } 7032 } 7033 7034 /* create the coarse KSP object only once with defaults */ 7035 if (coarse_mat) { 7036 PetscViewer dbg_viewer = NULL; 7037 if (pcbddc->dbg_flag) { 7038 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7039 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7040 } 7041 if (!pcbddc->coarse_ksp) { 7042 char prefix[256],str_level[16]; 7043 size_t len; 7044 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7045 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7046 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7047 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7048 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7049 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7050 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7051 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7052 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7053 /* prefix */ 7054 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7055 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7056 if (!pcbddc->current_level) { 7057 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7058 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7059 } else { 7060 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7061 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7062 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7063 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7064 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7065 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7066 } 7067 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7068 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7069 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7070 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7071 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7072 /* allow user customization */ 7073 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7074 } 7075 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7076 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7077 if (nisdofs) { 7078 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7079 for (i=0;i<nisdofs;i++) { 7080 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7081 } 7082 } 7083 if (nisneu) { 7084 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7085 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7086 } 7087 if (nisvert) { 7088 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7089 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7090 } 7091 7092 /* get some info after set from options */ 7093 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7094 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7095 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7096 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7097 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7098 isbddc = PETSC_FALSE; 7099 } 7100 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7101 if (isredundant) { 7102 KSP inner_ksp; 7103 PC inner_pc; 7104 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7105 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7106 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7107 } 7108 7109 /* parameters which miss an API */ 7110 if (isbddc) { 7111 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7112 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7113 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7114 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7115 if (pcbddc_coarse->benign_saddle_point) { 7116 Mat coarsedivudotp_is; 7117 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7118 IS row,col; 7119 const PetscInt *gidxs; 7120 PetscInt n,st,M,N; 7121 7122 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7123 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7124 st = st-n; 7125 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7126 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7127 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7128 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7129 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7130 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7131 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7132 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7133 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7134 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7135 ierr = ISDestroy(&row);CHKERRQ(ierr); 7136 ierr = ISDestroy(&col);CHKERRQ(ierr); 7137 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7138 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7139 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7140 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7141 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7142 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7143 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7144 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7145 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7146 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7147 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7148 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7149 } 7150 } 7151 7152 /* propagate symmetry info of coarse matrix */ 7153 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7154 if (pc->pmat->symmetric_set) { 7155 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7156 } 7157 if (pc->pmat->hermitian_set) { 7158 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7159 } 7160 if (pc->pmat->spd_set) { 7161 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7162 } 7163 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7164 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7165 } 7166 /* set operators */ 7167 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7168 if (pcbddc->dbg_flag) { 7169 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7170 } 7171 } 7172 ierr = PetscFree(isarray);CHKERRQ(ierr); 7173 #if 0 7174 { 7175 PetscViewer viewer; 7176 char filename[256]; 7177 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7178 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7179 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7180 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7181 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7182 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7183 } 7184 #endif 7185 7186 if (pcbddc->coarse_ksp) { 7187 Vec crhs,csol; 7188 7189 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7190 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7191 if (!csol) { 7192 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7193 } 7194 if (!crhs) { 7195 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7196 } 7197 } 7198 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7199 7200 /* compute null space for coarse solver if the benign trick has been requested */ 7201 if (pcbddc->benign_null) { 7202 7203 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7204 for (i=0;i<pcbddc->benign_n;i++) { 7205 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7206 } 7207 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7208 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7209 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7210 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7211 if (coarse_mat) { 7212 Vec nullv; 7213 PetscScalar *array,*array2; 7214 PetscInt nl; 7215 7216 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7217 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7218 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7219 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7220 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7221 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7222 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7223 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7224 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7225 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7226 } 7227 } 7228 7229 if (pcbddc->coarse_ksp) { 7230 PetscBool ispreonly; 7231 7232 if (CoarseNullSpace) { 7233 PetscBool isnull; 7234 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7235 if (isnull) { 7236 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7237 } 7238 /* TODO: add local nullspaces (if any) */ 7239 } 7240 /* setup coarse ksp */ 7241 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7242 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7243 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7244 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7245 KSP check_ksp; 7246 KSPType check_ksp_type; 7247 PC check_pc; 7248 Vec check_vec,coarse_vec; 7249 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7250 PetscInt its; 7251 PetscBool compute_eigs; 7252 PetscReal *eigs_r,*eigs_c; 7253 PetscInt neigs; 7254 const char *prefix; 7255 7256 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7257 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7258 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7259 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7260 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7261 /* prevent from setup unneeded object */ 7262 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7263 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7264 if (ispreonly) { 7265 check_ksp_type = KSPPREONLY; 7266 compute_eigs = PETSC_FALSE; 7267 } else { 7268 check_ksp_type = KSPGMRES; 7269 compute_eigs = PETSC_TRUE; 7270 } 7271 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7272 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7273 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7274 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7275 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7276 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7277 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7278 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7279 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7280 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7281 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7282 /* create random vec */ 7283 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7284 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7285 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7286 /* solve coarse problem */ 7287 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7288 /* set eigenvalue estimation if preonly has not been requested */ 7289 if (compute_eigs) { 7290 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7291 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7292 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7293 if (neigs) { 7294 lambda_max = eigs_r[neigs-1]; 7295 lambda_min = eigs_r[0]; 7296 if (pcbddc->use_coarse_estimates) { 7297 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7298 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7299 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7300 } 7301 } 7302 } 7303 } 7304 7305 /* check coarse problem residual error */ 7306 if (pcbddc->dbg_flag) { 7307 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7308 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7309 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7310 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7311 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7312 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7313 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7314 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7315 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7316 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7317 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7318 if (CoarseNullSpace) { 7319 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7320 } 7321 if (compute_eigs) { 7322 PetscReal lambda_max_s,lambda_min_s; 7323 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7324 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7325 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7326 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7327 for (i=0;i<neigs;i++) { 7328 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7329 } 7330 } 7331 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7332 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7333 } 7334 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7335 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7336 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7337 if (compute_eigs) { 7338 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7339 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7340 } 7341 } 7342 } 7343 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7344 /* print additional info */ 7345 if (pcbddc->dbg_flag) { 7346 /* waits until all processes reaches this point */ 7347 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7348 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7349 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7350 } 7351 7352 /* free memory */ 7353 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7354 PetscFunctionReturn(0); 7355 } 7356 7357 #undef __FUNCT__ 7358 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7359 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7360 { 7361 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7362 PC_IS* pcis = (PC_IS*)pc->data; 7363 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7364 IS subset,subset_mult,subset_n; 7365 PetscInt local_size,coarse_size=0; 7366 PetscInt *local_primal_indices=NULL; 7367 const PetscInt *t_local_primal_indices; 7368 PetscErrorCode ierr; 7369 7370 PetscFunctionBegin; 7371 /* Compute global number of coarse dofs */ 7372 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7373 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7374 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7375 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7376 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7377 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7378 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7379 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7380 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7381 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7382 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7383 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7384 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7385 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7386 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7387 7388 /* check numbering */ 7389 if (pcbddc->dbg_flag) { 7390 PetscScalar coarsesum,*array,*array2; 7391 PetscInt i; 7392 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7393 7394 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7395 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7396 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7397 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7398 /* counter */ 7399 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7400 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7401 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7402 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7403 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7404 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7405 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7406 for (i=0;i<pcbddc->local_primal_size;i++) { 7407 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7408 } 7409 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7410 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7411 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7412 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7413 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7414 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7415 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7416 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7417 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7418 for (i=0;i<pcis->n;i++) { 7419 if (array[i] != 0.0 && array[i] != array2[i]) { 7420 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7421 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7422 set_error = PETSC_TRUE; 7423 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7424 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7425 } 7426 } 7427 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7428 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7429 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7430 for (i=0;i<pcis->n;i++) { 7431 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7432 } 7433 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7434 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7435 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7436 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7437 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7438 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7439 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7440 PetscInt *gidxs; 7441 7442 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7443 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7444 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7445 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7446 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7447 for (i=0;i<pcbddc->local_primal_size;i++) { 7448 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7449 } 7450 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7451 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7452 } 7453 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7454 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7455 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7456 } 7457 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7458 /* get back data */ 7459 *coarse_size_n = coarse_size; 7460 *local_primal_indices_n = local_primal_indices; 7461 PetscFunctionReturn(0); 7462 } 7463 7464 #undef __FUNCT__ 7465 #define __FUNCT__ "PCBDDCGlobalToLocal" 7466 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7467 { 7468 IS localis_t; 7469 PetscInt i,lsize,*idxs,n; 7470 PetscScalar *vals; 7471 PetscErrorCode ierr; 7472 7473 PetscFunctionBegin; 7474 /* get indices in local ordering exploiting local to global map */ 7475 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7476 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7477 for (i=0;i<lsize;i++) vals[i] = 1.0; 7478 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7479 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7480 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7481 if (idxs) { /* multilevel guard */ 7482 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7483 } 7484 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7485 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7486 ierr = PetscFree(vals);CHKERRQ(ierr); 7487 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7488 /* now compute set in local ordering */ 7489 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7490 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7491 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7492 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7493 for (i=0,lsize=0;i<n;i++) { 7494 if (PetscRealPart(vals[i]) > 0.5) { 7495 lsize++; 7496 } 7497 } 7498 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7499 for (i=0,lsize=0;i<n;i++) { 7500 if (PetscRealPart(vals[i]) > 0.5) { 7501 idxs[lsize++] = i; 7502 } 7503 } 7504 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7505 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7506 *localis = localis_t; 7507 PetscFunctionReturn(0); 7508 } 7509 7510 #undef __FUNCT__ 7511 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7512 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7513 { 7514 PC_IS *pcis=(PC_IS*)pc->data; 7515 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7516 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7517 Mat S_j; 7518 PetscInt *used_xadj,*used_adjncy; 7519 PetscBool free_used_adj; 7520 PetscErrorCode ierr; 7521 7522 PetscFunctionBegin; 7523 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7524 free_used_adj = PETSC_FALSE; 7525 if (pcbddc->sub_schurs_layers == -1) { 7526 used_xadj = NULL; 7527 used_adjncy = NULL; 7528 } else { 7529 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7530 used_xadj = pcbddc->mat_graph->xadj; 7531 used_adjncy = pcbddc->mat_graph->adjncy; 7532 } else if (pcbddc->computed_rowadj) { 7533 used_xadj = pcbddc->mat_graph->xadj; 7534 used_adjncy = pcbddc->mat_graph->adjncy; 7535 } else { 7536 PetscBool flg_row=PETSC_FALSE; 7537 const PetscInt *xadj,*adjncy; 7538 PetscInt nvtxs; 7539 7540 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7541 if (flg_row) { 7542 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7543 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7544 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7545 free_used_adj = PETSC_TRUE; 7546 } else { 7547 pcbddc->sub_schurs_layers = -1; 7548 used_xadj = NULL; 7549 used_adjncy = NULL; 7550 } 7551 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7552 } 7553 } 7554 7555 /* setup sub_schurs data */ 7556 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7557 if (!sub_schurs->schur_explicit) { 7558 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7559 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7560 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7561 } else { 7562 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7563 PetscBool isseqaij,need_change = PETSC_FALSE; 7564 PetscInt benign_n; 7565 Mat change = NULL; 7566 Vec scaling = NULL; 7567 IS change_primal = NULL; 7568 7569 if (!pcbddc->use_vertices && reuse_solvers) { 7570 PetscInt n_vertices; 7571 7572 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7573 reuse_solvers = (PetscBool)!n_vertices; 7574 } 7575 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7576 if (!isseqaij) { 7577 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7578 if (matis->A == pcbddc->local_mat) { 7579 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7580 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7581 } else { 7582 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7583 } 7584 } 7585 if (!pcbddc->benign_change_explicit) { 7586 benign_n = pcbddc->benign_n; 7587 } else { 7588 benign_n = 0; 7589 } 7590 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7591 We need a global reduction to avoid possible deadlocks. 7592 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7593 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7594 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7595 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7596 need_change = (PetscBool)(!need_change); 7597 } 7598 /* If the user defines additional constraints, we import them here. 7599 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7600 if (need_change) { 7601 PC_IS *pcisf; 7602 PC_BDDC *pcbddcf; 7603 PC pcf; 7604 7605 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7606 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7607 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7608 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7609 /* hacks */ 7610 pcisf = (PC_IS*)pcf->data; 7611 pcisf->is_B_local = pcis->is_B_local; 7612 pcisf->vec1_N = pcis->vec1_N; 7613 pcisf->BtoNmap = pcis->BtoNmap; 7614 pcisf->n = pcis->n; 7615 pcisf->n_B = pcis->n_B; 7616 pcbddcf = (PC_BDDC*)pcf->data; 7617 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7618 pcbddcf->mat_graph = pcbddc->mat_graph; 7619 pcbddcf->use_faces = PETSC_TRUE; 7620 pcbddcf->use_change_of_basis = PETSC_TRUE; 7621 pcbddcf->use_change_on_faces = PETSC_TRUE; 7622 pcbddcf->use_qr_single = PETSC_TRUE; 7623 pcbddcf->fake_change = PETSC_TRUE; 7624 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7625 /* store information on primal vertices and change of basis (in local numbering) */ 7626 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7627 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7628 change = pcbddcf->ConstraintMatrix; 7629 pcbddcf->ConstraintMatrix = NULL; 7630 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7631 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7632 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7633 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7634 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7635 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7636 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7637 pcf->ops->destroy = NULL; 7638 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7639 } 7640 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7641 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7642 ierr = MatDestroy(&change);CHKERRQ(ierr); 7643 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7644 } 7645 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7646 7647 /* free adjacency */ 7648 if (free_used_adj) { 7649 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7650 } 7651 PetscFunctionReturn(0); 7652 } 7653 7654 #undef __FUNCT__ 7655 #define __FUNCT__ "PCBDDCInitSubSchurs" 7656 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7657 { 7658 PC_IS *pcis=(PC_IS*)pc->data; 7659 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7660 PCBDDCGraph graph; 7661 PetscErrorCode ierr; 7662 7663 PetscFunctionBegin; 7664 /* attach interface graph for determining subsets */ 7665 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7666 IS verticesIS,verticescomm; 7667 PetscInt vsize,*idxs; 7668 7669 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7670 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7671 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7672 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7673 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7674 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7675 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7676 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7677 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7678 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7679 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7680 } else { 7681 graph = pcbddc->mat_graph; 7682 } 7683 /* print some info */ 7684 if (pcbddc->dbg_flag) { 7685 IS vertices; 7686 PetscInt nv,nedges,nfaces; 7687 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7688 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7689 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7690 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7695 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7696 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7697 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7698 } 7699 7700 /* sub_schurs init */ 7701 if (!pcbddc->sub_schurs) { 7702 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7703 } 7704 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 7705 7706 /* free graph struct */ 7707 if (pcbddc->sub_schurs_rebuild) { 7708 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7709 } 7710 PetscFunctionReturn(0); 7711 } 7712 7713 #undef __FUNCT__ 7714 #define __FUNCT__ "PCBDDCCheckOperator" 7715 PetscErrorCode PCBDDCCheckOperator(PC pc) 7716 { 7717 PC_IS *pcis=(PC_IS*)pc->data; 7718 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7719 PetscErrorCode ierr; 7720 7721 PetscFunctionBegin; 7722 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7723 IS zerodiag = NULL; 7724 Mat S_j,B0_B=NULL; 7725 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7726 PetscScalar *p0_check,*array,*array2; 7727 PetscReal norm; 7728 PetscInt i; 7729 7730 /* B0 and B0_B */ 7731 if (zerodiag) { 7732 IS dummy; 7733 7734 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7735 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7736 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7737 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7738 } 7739 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7740 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7741 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7742 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7743 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7744 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7745 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7746 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7747 /* S_j */ 7748 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7749 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7750 7751 /* mimic vector in \widetilde{W}_\Gamma */ 7752 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7753 /* continuous in primal space */ 7754 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7755 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7756 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7757 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7758 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7759 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7760 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7761 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7762 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7763 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7764 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7765 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7766 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7767 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7768 7769 /* assemble rhs for coarse problem */ 7770 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7771 /* local with Schur */ 7772 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7773 if (zerodiag) { 7774 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7775 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7776 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7777 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7778 } 7779 /* sum on primal nodes the local contributions */ 7780 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7781 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7782 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7783 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7784 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7785 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7786 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7787 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7788 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7789 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7790 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7791 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7792 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7793 /* scale primal nodes (BDDC sums contibutions) */ 7794 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7795 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7796 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7797 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7798 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7799 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7800 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7801 /* global: \widetilde{B0}_B w_\Gamma */ 7802 if (zerodiag) { 7803 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7804 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7805 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7806 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7807 } 7808 /* BDDC */ 7809 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7810 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7811 7812 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7813 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7814 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7815 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7816 for (i=0;i<pcbddc->benign_n;i++) { 7817 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7818 } 7819 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7820 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7821 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7822 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7823 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7824 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7825 } 7826 PetscFunctionReturn(0); 7827 } 7828