1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignCheck" 10 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PC_IS* pcis = (PC_IS*)(pc->data); 14 IS dirIS = NULL; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 19 if (zerodiag) { 20 Mat A; 21 Vec vec3_N; 22 PetscScalar *vals; 23 const PetscInt *idxs; 24 PetscInt i,nz; 25 26 /* p0 */ 27 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 28 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 29 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 30 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 31 for (i=0;i<nz;i++) vals[i] = 1.; /* TODO add quadrature */ 32 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 33 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 34 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 35 /* v_I */ 36 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 37 for (i=0;i<nz;i++) vals[i] = 0.; 38 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 39 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 40 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 41 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 42 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 43 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 44 if (dirIS) { 45 PetscInt n; 46 47 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 48 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 49 for (i=0;i<n;i++) vals[i] = 0.; 50 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 51 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 52 } 53 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 54 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 55 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 56 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 57 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 58 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 59 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 60 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { /* TODO: should I add the A-norm in test? */ 61 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 62 } 63 ierr = PetscFree(vals);CHKERRQ(ierr); 64 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 65 } 66 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 67 68 /* check PCBDDCBenignGetOrSetP0 */ 69 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 70 pcbddc->benign_p0 = -PetscGlobalRank; 71 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 72 pcbddc->benign_p0 = 1; 73 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 74 if (pcbddc->benign_p0_gidx >=0 && pcbddc->benign_p0 != -PetscGlobalRank) { 75 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e instead of %1.4e\n",pcbddc->benign_p0,-PetscGlobalRank);CHKERRQ(ierr); 76 } 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 82 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 83 { 84 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 85 IS pressures,zerodiag; 86 PetscInt nz; 87 PetscBool sorted; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 92 ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr); 93 pcbddc->benign_original_mat = pcbddc->local_mat; 94 /* if a local info on dofs is present, assumes the last field is represented by "pressures", otherwise, uses only zerodiagonal dofs (ok if the pressure block is all zero) */ 95 if (pcbddc->n_ISForDofsLocal) { 96 PetscInt p = pcbddc->n_ISForDofsLocal-1; 97 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[p]);CHKERRQ(ierr); 98 pressures = pcbddc->ISForDofsLocal[p]; 99 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 100 if (!sorted) { 101 ierr = ISSort(pressures);CHKERRQ(ierr); 102 } 103 } else { 104 pressures = NULL; 105 } 106 ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr); 107 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 108 if (!sorted) { 109 ierr = ISSort(zerodiag);CHKERRQ(ierr); 110 } 111 /* 112 Check if all the pressure dofs have a zero diagonal 113 If not, a change of basis on pressures is not needed 114 since the local Schur complements are SPD 115 */ 116 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 117 if (pressures) { 118 PetscInt np; 119 PetscBool sameis; 120 121 sameis = PETSC_FALSE; 122 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 123 if (nz == np) { 124 const PetscInt *idxs,*pidxs; 125 126 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 127 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 128 ierr = PetscMemcmp(pidxs,idxs,np*sizeof(PetscInt),&sameis);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 130 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 131 } 132 if (!sameis) { /* destroy index sets and set nz to 0 to avoid next code branch */ 133 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 134 nz = 0; 135 } 136 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 137 } 138 if (nz) { 139 IS zerodiagc; 140 PetscScalar *array; 141 const PetscInt *idxs,*idxsc; 142 PetscInt i,n,*nnz; 143 144 /* TODO: add check for shared dofs and raise error */ 145 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 146 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 147 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 148 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 149 /* local change of basis for pressures */ 150 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 151 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr); 152 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 153 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 154 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 155 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities */ 156 for (i=0;i<nz-1;i++) nnz[idxs[i]] = 2; /* change on pressures */ 157 nnz[idxs[nz-1]] = nz; /* last local pressure dof: _0 set */ 158 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 159 ierr = PetscFree(nnz);CHKERRQ(ierr); 160 /* set identity on velocities */ 161 for (i=0;i<n-nz;i++) { 162 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 163 } 164 /* set change on pressures */ 165 for (i=0;i<nz-1;i++) { 166 PetscScalar vals[2]; 167 PetscInt cols[2]; 168 169 /* TODO: add quadrature */ 170 cols[0] = idxs[i]; 171 cols[1] = idxs[nz-1]; 172 vals[0] = 1.; 173 vals[1] = 1./nz; 174 ierr = MatSetValues(pcbddc->benign_change,1,idxs+i,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 175 } 176 ierr = PetscMalloc1(nz,&array);CHKERRQ(ierr); 177 for (i=0;i<nz-1;i++) array[i] = -1.; 178 array[nz-1] = 1./nz; 179 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nz-1,nz,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 180 ierr = PetscFree(array);CHKERRQ(ierr); 181 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 182 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 183 /* TODO: need optimization? */ 184 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 185 ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 186 /* store local and global idxs for p0 */ 187 pcbddc->benign_p0_lidx = idxs[nz-1]; 188 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,1,&idxs[nz-1],&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 189 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 190 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 191 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 192 } 193 *zerodiaglocal = zerodiag; 194 PetscFunctionReturn(0); 195 } 196 197 #undef __FUNCT__ 198 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 199 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 200 { 201 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 202 PetscErrorCode ierr; 203 204 PetscFunctionBegin; 205 if (!pcbddc->benign_sf) { 206 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 207 if (pcbddc->benign_p0_gidx >= 0) { 208 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 209 } else { 210 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 211 } 212 } 213 if (get) { /* use SF to get values */ 214 PetscScalar *array; 215 216 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 217 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 218 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 219 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 220 } else { /* use VecSetValue */ 221 if (pcbddc->benign_p0_gidx >= 0) { 222 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 223 } 224 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 225 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 226 } 227 PetscFunctionReturn(0); 228 } 229 230 #undef __FUNCT__ 231 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 232 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 233 { 234 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 235 PetscErrorCode ierr; 236 237 PetscFunctionBegin; 238 /* TODO: add error checking 239 - avoid nested pop (or push) calls. 240 - cannot push before pop. 241 - cannot call this if pcbddc->local_mat is NULL 242 */ 243 if (pcbddc->benign_p0_lidx < 0) { 244 PetscFunctionReturn(0); 245 } 246 if (pop) { 247 const PetscInt *cB0_cols; 248 PetscInt cB0_ncol; 249 const PetscScalar *cB0_vals; 250 251 /* extract B_0 */ 252 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 253 pcbddc->B0_ncol = cB0_ncol; 254 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 255 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 256 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 257 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 258 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 259 /* remove rows and cols from local problem */ 260 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 261 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 262 } else { /* push */ 263 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 264 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 265 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 266 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 267 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "PCBDDCAdaptiveSelection" 274 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 275 { 276 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 277 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 278 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 279 PetscBLASInt *B_iwork,*B_ifail; 280 PetscScalar *work,lwork; 281 PetscScalar *St,*S,*eigv; 282 PetscScalar *Sarray,*Starray; 283 PetscReal *eigs,thresh; 284 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 285 PetscBool allocated_S_St; 286 #if defined(PETSC_USE_COMPLEX) 287 PetscReal *rwork; 288 #endif 289 PetscErrorCode ierr; 290 291 PetscFunctionBegin; 292 if (!sub_schurs->use_mumps) { 293 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 294 } 295 296 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 297 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 298 } 299 300 if (pcbddc->dbg_flag) { 301 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 302 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 303 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 304 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 305 } 306 307 if (pcbddc->dbg_flag) { 308 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 309 } 310 311 /* max size of subsets */ 312 mss = 0; 313 for (i=0;i<sub_schurs->n_subs;i++) { 314 PetscInt subset_size; 315 316 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 317 mss = PetscMax(mss,subset_size); 318 } 319 320 /* min/max and threshold */ 321 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 322 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 323 nmax = PetscMax(nmin,nmax); 324 allocated_S_St = PETSC_FALSE; 325 if (nmin) { 326 allocated_S_St = PETSC_TRUE; 327 } 328 329 /* allocate lapack workspace */ 330 cum = cum2 = 0; 331 maxneigs = 0; 332 for (i=0;i<sub_schurs->n_subs;i++) { 333 PetscInt n,subset_size; 334 335 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 336 n = PetscMin(subset_size,nmax); 337 cum += subset_size; 338 cum2 += subset_size*n; 339 maxneigs = PetscMax(maxneigs,n); 340 } 341 if (mss) { 342 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 343 PetscBLASInt B_itype = 1; 344 PetscBLASInt B_N = mss; 345 PetscReal zero = 0.0; 346 PetscReal eps = 0.0; /* dlamch? */ 347 348 B_lwork = -1; 349 S = NULL; 350 St = NULL; 351 eigs = NULL; 352 eigv = NULL; 353 B_iwork = NULL; 354 B_ifail = NULL; 355 #if defined(PETSC_USE_COMPLEX) 356 rwork = NULL; 357 #endif 358 thresh = 1.0; 359 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 360 #if defined(PETSC_USE_COMPLEX) 361 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 362 #else 363 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 364 #endif 365 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 366 ierr = PetscFPTrapPop();CHKERRQ(ierr); 367 } else { 368 /* TODO */ 369 } 370 } else { 371 lwork = 0; 372 } 373 374 nv = 0; 375 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 376 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 377 } 378 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 379 if (allocated_S_St) { 380 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 381 } 382 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 383 #if defined(PETSC_USE_COMPLEX) 384 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 385 #endif 386 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 387 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 388 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 389 nv+cum,&pcbddc->adaptive_constraints_idxs, 390 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 391 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 392 393 maxneigs = 0; 394 cum = cum2 = cumarray = 0; 395 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 396 pcbddc->adaptive_constraints_data_ptr[0] = 0; 397 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 398 const PetscInt *idxs; 399 400 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 401 for (cum=0;cum<nv;cum++) { 402 pcbddc->adaptive_constraints_n[cum] = 1; 403 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 404 pcbddc->adaptive_constraints_data[cum] = 1.0; 405 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 406 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 407 } 408 cum2 = cum; 409 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 410 } 411 412 if (mss) { /* multilevel */ 413 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 414 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 415 } 416 417 for (i=0;i<sub_schurs->n_subs;i++) { 418 419 const PetscInt *idxs; 420 PetscReal infty = PETSC_MAX_REAL; 421 PetscInt j,subset_size,eigs_start = 0; 422 PetscBLASInt B_N; 423 PetscBool same_data = PETSC_FALSE; 424 425 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 426 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 427 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 428 if (sub_schurs->is_hermitian) { 429 PetscInt j,k; 430 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 431 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 432 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 433 } 434 for (j=0;j<subset_size;j++) { 435 for (k=j;k<subset_size;k++) { 436 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 437 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 438 } 439 } 440 } else { 441 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 442 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 443 } 444 } else { 445 S = Sarray + cumarray; 446 St = Starray + cumarray; 447 } 448 449 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 450 /* see if we can save some work */ 451 if (sub_schurs->n_subs == 1) { 452 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 453 } 454 455 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 456 B_neigs = 0; 457 } else { 458 /* Threshold: this is an heuristic for edges */ 459 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 460 461 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 462 PetscBLASInt B_itype = 1; 463 PetscBLASInt B_IL, B_IU; 464 PetscReal eps = -1.0; /* dlamch? */ 465 PetscInt nmin_s; 466 467 if (pcbddc->dbg_flag) { 468 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 469 } 470 471 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 472 if (thresh > 1.+PETSC_SMALL) { 473 474 /* ask for eigenvalues larger than thresh */ 475 #if defined(PETSC_USE_COMPLEX) 476 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 477 #else 478 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 479 #endif 480 } else { 481 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 482 B_IL = 1; 483 #if defined(PETSC_USE_COMPLEX) 484 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 485 #else 486 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 487 #endif 488 } 489 ierr = PetscFPTrapPop();CHKERRQ(ierr); 490 if (B_ierr) { 491 if (B_ierr < 0 ) { 492 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 493 } else if (B_ierr <= B_N) { 494 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 495 } else { 496 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 497 } 498 } 499 500 if (B_neigs > nmax) { 501 if (pcbddc->dbg_flag) { 502 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 503 } 504 eigs_start = B_neigs -nmax; 505 B_neigs = nmax; 506 } 507 508 nmin_s = PetscMin(nmin,B_N); 509 if (B_neigs < nmin_s) { 510 PetscBLASInt B_neigs2; 511 512 B_IU = B_N - B_neigs; 513 B_IL = B_N - nmin_s + 1; 514 if (pcbddc->dbg_flag) { 515 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 516 } 517 if (sub_schurs->is_hermitian) { 518 PetscInt j; 519 for (j=0;j<subset_size;j++) { 520 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 521 } 522 for (j=0;j<subset_size;j++) { 523 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 524 } 525 } else { 526 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 527 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 528 } 529 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 530 #if defined(PETSC_USE_COMPLEX) 531 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 532 #else 533 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 534 #endif 535 ierr = PetscFPTrapPop();CHKERRQ(ierr); 536 B_neigs += B_neigs2; 537 } 538 if (B_ierr) { 539 if (B_ierr < 0 ) { 540 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 541 } else if (B_ierr <= B_N) { 542 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 543 } else { 544 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 545 } 546 } 547 if (pcbddc->dbg_flag) { 548 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 549 for (j=0;j<B_neigs;j++) { 550 if (eigs[j] == 0.0) { 551 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 552 } else { 553 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 554 } 555 } 556 } 557 } else { 558 /* TODO */ 559 } 560 } 561 maxneigs = PetscMax(B_neigs,maxneigs); 562 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 563 if (B_neigs) { 564 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 565 566 if (pcbddc->dbg_flag > 1) { 567 PetscInt ii; 568 for (ii=0;ii<B_neigs;ii++) { 569 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 570 for (j=0;j<B_N;j++) { 571 #if defined(PETSC_USE_COMPLEX) 572 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 573 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 574 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 575 #else 576 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 577 #endif 578 } 579 } 580 } 581 #if 0 582 for (j=0;j<B_neigs;j++) { 583 PetscBLASInt Blas_N,Blas_one = 1.0; 584 PetscScalar norm; 585 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 586 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 587 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 588 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 589 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 590 } else { 591 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 592 } 593 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 594 } 595 #endif 596 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 597 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 598 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 599 cum++; 600 } 601 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 602 /* shift for next computation */ 603 cumarray += subset_size*subset_size; 604 } 605 if (pcbddc->dbg_flag) { 606 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 607 } 608 609 if (mss) { 610 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 611 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 612 /* destroy matrices (junk) */ 613 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 614 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 615 } 616 if (allocated_S_St) { 617 ierr = PetscFree2(S,St);CHKERRQ(ierr); 618 } 619 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 620 #if defined(PETSC_USE_COMPLEX) 621 ierr = PetscFree(rwork);CHKERRQ(ierr); 622 #endif 623 if (pcbddc->dbg_flag) { 624 PetscInt maxneigs_r; 625 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 626 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 627 } 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "PCBDDCSetUpSolvers" 633 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 634 { 635 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 636 PetscScalar *coarse_submat_vals; 637 PetscErrorCode ierr; 638 639 PetscFunctionBegin; 640 /* Setup local scatters R_to_B and (optionally) R_to_D */ 641 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 642 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 643 644 /* Setup local neumann solver ksp_R */ 645 /* PCBDDCSetUpLocalScatters should be called first! */ 646 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 647 648 /* Change global null space passed in by the user if change of basis has been requested */ 649 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 650 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 651 } 652 653 /* 654 Setup local correction and local part of coarse basis. 655 Gives back the dense local part of the coarse matrix in column major ordering 656 */ 657 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 658 659 /* Compute total number of coarse nodes and setup coarse solver */ 660 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 661 662 /* free */ 663 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "PCBDDCResetCustomization" 669 PetscErrorCode PCBDDCResetCustomization(PC pc) 670 { 671 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 672 PetscErrorCode ierr; 673 674 PetscFunctionBegin; 675 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 676 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 677 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 678 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 679 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 680 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 681 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 682 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 683 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 684 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 685 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "PCBDDCResetTopography" 691 PetscErrorCode PCBDDCResetTopography(PC pc) 692 { 693 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 694 PetscErrorCode ierr; 695 696 PetscFunctionBegin; 697 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 700 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 701 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } 704 705 #undef __FUNCT__ 706 #define __FUNCT__ "PCBDDCResetSolvers" 707 PetscErrorCode PCBDDCResetSolvers(PC pc) 708 { 709 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 710 PetscScalar *array; 711 PetscErrorCode ierr; 712 713 PetscFunctionBegin; 714 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 715 if (pcbddc->coarse_phi_B) { 716 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 717 ierr = PetscFree(array);CHKERRQ(ierr); 718 } 719 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 720 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 721 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 722 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 723 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 724 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 725 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 726 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 727 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 728 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 729 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 730 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 731 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 732 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 733 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 734 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 735 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 736 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 737 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 738 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 739 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 740 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 741 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 742 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 743 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 744 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 745 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 746 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 752 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 753 { 754 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 755 PC_IS *pcis = (PC_IS*)pc->data; 756 VecType impVecType; 757 PetscInt n_constraints,n_R,old_size,n_benign; 758 PetscErrorCode ierr; 759 760 PetscFunctionBegin; 761 if (!pcbddc->ConstraintMatrix) { 762 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 763 } 764 /* get sizes */ 765 n_benign = 0; 766 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 767 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 768 n_R = pcis->n - pcbddc->n_vertices; 769 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 770 /* local work vectors (try to avoid unneeded work)*/ 771 /* R nodes */ 772 old_size = -1; 773 if (pcbddc->vec1_R) { 774 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 775 } 776 if (n_R != old_size) { 777 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 778 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 779 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 780 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 781 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 782 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 783 } 784 /* local primal dofs */ 785 old_size = -1; 786 if (pcbddc->vec1_P) { 787 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 788 } 789 if (pcbddc->local_primal_size != old_size) { 790 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 791 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 792 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 793 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 794 } 795 /* local explicit constraints */ 796 old_size = -1; 797 if (pcbddc->vec1_C) { 798 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 799 } 800 if (n_constraints && n_constraints != old_size) { 801 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 802 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 803 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 804 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 805 } 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "PCBDDCSetUpCorrection" 811 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 812 { 813 PetscErrorCode ierr; 814 /* pointers to pcis and pcbddc */ 815 PC_IS* pcis = (PC_IS*)pc->data; 816 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 817 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 818 /* submatrices of local problem */ 819 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 820 /* submatrices of benign trick */ 821 Mat B0_V = NULL; 822 /* submatrices of local coarse problem */ 823 Mat S_VV,S_CV,S_VC,S_CC; 824 /* working matrices */ 825 Mat C_CR; 826 /* additional working stuff */ 827 PC pc_R; 828 Mat F,B0 = NULL; 829 PetscBool isLU,isCHOL,isILU; 830 831 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 832 PetscScalar *work; 833 PetscInt *idx_V_B; 834 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 835 PetscInt i,n_R,n_D,n_B; 836 PetscBool unsymmetric_check; 837 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 838 MatType impMatType; 839 /* some shortcuts to scalars */ 840 PetscScalar one=1.0,m_one=-1.0; 841 842 PetscFunctionBegin; 843 n_benign = 0; 844 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 845 n_vertices = pcbddc->n_vertices; 846 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 847 /* Set Non-overlapping dimensions */ 848 n_B = pcis->n_B; 849 n_D = pcis->n - n_B; 850 n_R = pcis->n - n_vertices; 851 852 /* Set types for local objects needed by BDDC precondtioner */ 853 impMatType = MATSEQDENSE; 854 855 /* vertices in boundary numbering */ 856 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 857 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 858 if (i != n_vertices) { 859 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 860 } 861 862 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 863 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 864 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 865 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 866 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 867 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 868 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 869 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 870 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 871 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 872 873 unsymmetric_check = PETSC_FALSE; 874 /* allocate workspace */ 875 n = 0; 876 if (n_constraints) { 877 n += n_R*n_constraints; 878 } 879 if (n_vertices) { 880 n = PetscMax(2*n_R*n_vertices,n); 881 n = PetscMax((n_R+n_B)*n_vertices,n); 882 } 883 if (!pcbddc->symmetric_primal) { 884 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 885 unsymmetric_check = PETSC_TRUE; 886 } 887 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 888 889 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 890 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 891 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 892 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 893 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 894 if (isLU || isILU || isCHOL) { 895 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 896 } else if (sub_schurs->reuse_mumps) { 897 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 898 MatFactorType type; 899 900 F = reuse_mumps->F; 901 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 902 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 903 } else { 904 F = NULL; 905 } 906 907 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 908 if (n_constraints) { 909 Mat M1,M2,M3; 910 Mat auxmat; 911 IS is_aux; 912 PetscScalar *array,*array2; 913 914 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 915 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 916 917 /* Extract constraints on R nodes: C_{CR} */ 918 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 919 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 920 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 921 922 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 923 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 924 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 925 for (i=0;i<n_constraints;i++) { 926 const PetscScalar *row_cmat_values; 927 const PetscInt *row_cmat_indices; 928 PetscInt size_of_constraint,j; 929 930 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 931 for (j=0;j<size_of_constraint;j++) { 932 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 933 } 934 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 935 } 936 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 937 if (F) { 938 Mat B; 939 940 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 941 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 942 ierr = MatDestroy(&B);CHKERRQ(ierr); 943 } else { 944 PetscScalar *marr; 945 946 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 947 for (i=0;i<n_constraints;i++) { 948 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 949 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 950 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 951 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 952 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 953 } 954 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 955 } 956 if (!pcbddc->switch_static) { 957 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 958 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 959 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 960 for (i=0;i<n_constraints;i++) { 961 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 962 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 963 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 964 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 966 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 967 } 968 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 969 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 970 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 971 } else { 972 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 973 pcbddc->local_auxmat2 = local_auxmat2_R; 974 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 975 } 976 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 977 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 978 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 979 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 980 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 981 if (isCHOL) { 982 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 983 } else { 984 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 985 } 986 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 987 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 988 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 989 ierr = MatDestroy(&M2);CHKERRQ(ierr); 990 ierr = MatDestroy(&M3);CHKERRQ(ierr); 991 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 992 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 993 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 994 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 995 ierr = MatDestroy(&M1);CHKERRQ(ierr); 996 } 997 /* Get submatrices from subdomain matrix */ 998 if (n_benign) { 999 IS dummy; 1000 Mat B0_R; 1001 PetscReal norm; 1002 PetscInt ii[2]; 1003 1004 ii[0] = 0; 1005 ii[1] = pcbddc->B0_ncol; 1006 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 1007 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 1008 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1009 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1010 if (norm > PETSC_SMALL) { 1011 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1012 } 1013 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1014 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1015 } 1016 1017 if (n_vertices) { 1018 IS is_aux; 1019 1020 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1021 IS tis; 1022 1023 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1024 ierr = ISSort(tis);CHKERRQ(ierr); 1025 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1026 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1027 } else { 1028 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1029 } 1030 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1031 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1032 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1033 if (n_benign) { 1034 IS dummy; 1035 1036 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 1037 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1038 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1039 } 1040 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1041 } 1042 1043 /* Matrix of coarse basis functions (local) */ 1044 if (pcbddc->coarse_phi_B) { 1045 PetscInt on_B,on_primal,on_D=n_D; 1046 if (pcbddc->coarse_phi_D) { 1047 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1048 } 1049 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1050 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1051 PetscScalar *marray; 1052 1053 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1054 ierr = PetscFree(marray);CHKERRQ(ierr); 1055 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1056 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1057 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1058 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1059 } 1060 } 1061 1062 if (!pcbddc->coarse_phi_B) { 1063 PetscScalar *marray; 1064 1065 n = n_B*pcbddc->local_primal_size; 1066 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1067 n += n_D*pcbddc->local_primal_size; 1068 } 1069 if (!pcbddc->symmetric_primal) { 1070 n *= 2; 1071 } 1072 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1073 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1074 n = n_B*pcbddc->local_primal_size; 1075 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1076 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1077 n += n_D*pcbddc->local_primal_size; 1078 } 1079 if (!pcbddc->symmetric_primal) { 1080 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1081 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1082 n = n_B*pcbddc->local_primal_size; 1083 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1084 } 1085 } else { 1086 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1087 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1088 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1089 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1090 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1091 } 1092 } 1093 } 1094 1095 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1096 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1097 const PetscInt *idxs; 1098 1099 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1100 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 1101 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1102 } 1103 1104 /* vertices */ 1105 if (n_vertices) { 1106 1107 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1108 1109 if (n_R) { 1110 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1111 PetscBLASInt B_N,B_one = 1; 1112 PetscScalar *x,*y; 1113 PetscBool isseqaij; 1114 1115 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1116 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1117 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1118 if (F) { /* TODO could be optimized for symmetric problems */ 1119 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1120 } else { 1121 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1122 for (i=0;i<n_vertices;i++) { 1123 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 1124 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1125 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1126 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1127 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1128 } 1129 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1130 } 1131 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1132 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 1133 if (n_constraints) { 1134 Mat B; 1135 1136 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1137 for (i=0;i<n_vertices;i++) { 1138 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1139 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 1140 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1143 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1144 } 1145 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1146 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1147 ierr = MatDestroy(&B);CHKERRQ(ierr); 1148 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1149 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1150 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1151 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 1152 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 1153 ierr = MatDestroy(&B);CHKERRQ(ierr); 1154 } 1155 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1156 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1157 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1158 } 1159 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1160 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1161 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1162 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1163 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1164 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1165 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1166 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1167 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1168 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1169 } else { 1170 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1171 } 1172 if (n_benign) { 1173 const PetscScalar *vals; 1174 const PetscInt *idxs; 1175 PetscInt n,j; 1176 1177 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1178 for (j=0;j<n;j++) { 1179 coarse_submat_vals[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1180 coarse_submat_vals[(idxs[j]+1)*pcbddc->local_primal_size-1] = vals[j]; 1181 } 1182 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1183 } 1184 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1185 1186 /* coarse basis functions */ 1187 for (i=0;i<n_vertices;i++) { 1188 PetscScalar *y; 1189 1190 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1191 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1192 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1193 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1194 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1195 y[n_B*i+idx_V_B[i]] = 1.0; 1196 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1197 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1198 1199 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1200 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1201 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1202 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1203 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1204 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1205 if (n_benign) y[n_D*i+p0_lidx_I] = 0.0; 1206 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1207 } 1208 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1209 } 1210 /* if n_R == 0 the object is not destroyed */ 1211 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1212 } 1213 1214 if (n_constraints) { 1215 Mat B; 1216 1217 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1218 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1219 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1220 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1221 if (n_vertices) { 1222 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1223 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1224 } else { 1225 Mat S_VCt; 1226 1227 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1228 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1229 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1230 } 1231 } 1232 ierr = MatDestroy(&B);CHKERRQ(ierr); 1233 /* coarse basis functions */ 1234 for (i=0;i<n_constraints;i++) { 1235 PetscScalar *y; 1236 1237 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1238 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1239 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1240 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1241 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1242 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1243 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1244 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1245 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1246 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1247 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1248 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1249 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1250 if (n_benign) y[n_D*(i+n_vertices)+p0_lidx_I] = 0.0; 1251 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1252 } 1253 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1254 } 1255 } 1256 if (n_constraints) { 1257 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1258 } 1259 1260 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1261 1262 /* compute other basis functions for non-symmetric problems */ 1263 if (!pcbddc->symmetric_primal) { 1264 1265 if (n_constraints) { 1266 Mat S_CCT,B_C; 1267 1268 /* this is a lazy thing */ 1269 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1270 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1271 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1272 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1273 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1274 if (n_vertices) { 1275 Mat B_V,S_VCT; 1276 1277 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1278 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1279 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1280 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1281 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1282 } 1283 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1284 } else { /* if there are no constraints, reset work */ 1285 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1286 } 1287 if (n_vertices && n_R) { 1288 Mat A_VRT; 1289 PetscScalar *marray; 1290 PetscBLASInt B_N,B_one = 1; 1291 1292 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1293 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1294 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1295 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1296 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1297 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1298 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1299 } 1300 1301 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1302 for (i=0;i<pcbddc->local_primal_size;i++) { 1303 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1304 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1305 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1306 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1307 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1308 } 1309 } else { 1310 for (i=0;i<pcbddc->local_primal_size;i++) { 1311 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1312 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1313 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1314 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1315 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1316 } 1317 } 1318 /* coarse basis functions */ 1319 for (i=0;i<pcbddc->local_primal_size;i++) { 1320 PetscScalar *y; 1321 1322 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1323 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1324 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1325 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1326 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1327 if (i<n_vertices) { 1328 y[n_B*i+idx_V_B[i]] = 1.0; 1329 } 1330 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1331 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1332 1333 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1334 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1335 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1336 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1337 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1338 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1339 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1340 } 1341 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1342 } 1343 } 1344 /* free memory */ 1345 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1346 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1347 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1348 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1349 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1350 ierr = PetscFree(work);CHKERRQ(ierr); 1351 if (n_vertices) { 1352 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1353 } 1354 if (n_constraints) { 1355 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1356 } 1357 /* Checking coarse_sub_mat and coarse basis functios */ 1358 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1359 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1360 if (pcbddc->dbg_flag) { 1361 Mat coarse_sub_mat; 1362 Mat AUXMAT,TM1,TM2,TM3,TM4; 1363 Mat coarse_phi_D,coarse_phi_B; 1364 Mat coarse_psi_D,coarse_psi_B; 1365 Mat A_II,A_BB,A_IB,A_BI; 1366 Mat C_B,CPHI; 1367 IS is_dummy; 1368 Vec mones; 1369 MatType checkmattype=MATSEQAIJ; 1370 PetscReal real_value; 1371 1372 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1373 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1374 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1375 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1376 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1377 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1378 if (unsymmetric_check) { 1379 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1380 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1381 } 1382 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1383 1384 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1385 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1386 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1387 if (unsymmetric_check) { 1388 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1389 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1390 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1391 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1392 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1393 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1394 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1395 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1396 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1397 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1398 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1399 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1400 } else { 1401 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1402 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1403 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1404 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1405 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1406 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1407 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1408 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1409 } 1410 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1411 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1412 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1413 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1414 if (n_benign) { 1415 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1416 PetscScalar *data,*data2; 1417 1418 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1419 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1420 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1421 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1422 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1423 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1424 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1425 for (i=0;i<pcbddc->local_primal_size;i++) { 1426 data[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+i] += data2[i]; 1427 data[(i+1)*pcbddc->local_primal_size-1] += data2[i]; 1428 } 1429 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1430 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1431 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1432 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1433 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1434 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1435 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1436 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1437 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1438 } 1439 #if 0 1440 { 1441 PetscViewer viewer; 1442 char filename[256]; 1443 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1444 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1445 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1446 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1447 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1448 } 1449 #endif 1450 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1451 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1452 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1454 1455 /* check constraints */ 1456 if (!n_benign) { 1457 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1458 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1459 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1460 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1461 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1462 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1463 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1465 if (unsymmetric_check) { 1466 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1467 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1468 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1469 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1470 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1471 } 1472 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1473 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1474 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1475 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1476 } 1477 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1478 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1479 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1480 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1481 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1482 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1483 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1484 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1485 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1486 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1487 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1488 if (unsymmetric_check) { 1489 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1490 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1491 } 1492 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1493 } 1494 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1495 /* get back data */ 1496 *coarse_submat_vals_n = coarse_submat_vals; 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1502 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1503 { 1504 Mat *work_mat; 1505 IS isrow_s,iscol_s; 1506 PetscBool rsorted,csorted; 1507 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1508 PetscErrorCode ierr; 1509 1510 PetscFunctionBegin; 1511 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1512 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1513 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1514 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1515 1516 if (!rsorted) { 1517 const PetscInt *idxs; 1518 PetscInt *idxs_sorted,i; 1519 1520 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1521 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1522 for (i=0;i<rsize;i++) { 1523 idxs_perm_r[i] = i; 1524 } 1525 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1526 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1527 for (i=0;i<rsize;i++) { 1528 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1529 } 1530 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1531 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1532 } else { 1533 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1534 isrow_s = isrow; 1535 } 1536 1537 if (!csorted) { 1538 if (isrow == iscol) { 1539 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1540 iscol_s = isrow_s; 1541 } else { 1542 const PetscInt *idxs; 1543 PetscInt *idxs_sorted,i; 1544 1545 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1546 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1547 for (i=0;i<csize;i++) { 1548 idxs_perm_c[i] = i; 1549 } 1550 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1551 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1552 for (i=0;i<csize;i++) { 1553 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1554 } 1555 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1556 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1557 } 1558 } else { 1559 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1560 iscol_s = iscol; 1561 } 1562 1563 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1564 1565 if (!rsorted || !csorted) { 1566 Mat new_mat; 1567 IS is_perm_r,is_perm_c; 1568 1569 if (!rsorted) { 1570 PetscInt *idxs_r,i; 1571 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1572 for (i=0;i<rsize;i++) { 1573 idxs_r[idxs_perm_r[i]] = i; 1574 } 1575 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1576 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1577 } else { 1578 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1579 } 1580 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1581 1582 if (!csorted) { 1583 if (isrow_s == iscol_s) { 1584 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1585 is_perm_c = is_perm_r; 1586 } else { 1587 PetscInt *idxs_c,i; 1588 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1589 for (i=0;i<csize;i++) { 1590 idxs_c[idxs_perm_c[i]] = i; 1591 } 1592 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1593 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1594 } 1595 } else { 1596 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1597 } 1598 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1599 1600 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1601 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1602 work_mat[0] = new_mat; 1603 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1604 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1605 } 1606 1607 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1608 *B = work_mat[0]; 1609 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1610 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1611 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1612 PetscFunctionReturn(0); 1613 } 1614 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1617 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1618 { 1619 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1620 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1621 Mat new_mat; 1622 IS is_local,is_global; 1623 PetscInt local_size; 1624 PetscBool isseqaij; 1625 PetscErrorCode ierr; 1626 1627 PetscFunctionBegin; 1628 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1629 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1630 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1631 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1632 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1633 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1634 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1635 1636 /* check */ 1637 if (pcbddc->dbg_flag) { 1638 Vec x,x_change; 1639 PetscReal error; 1640 1641 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1642 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1643 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1644 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1647 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1648 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1649 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1650 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1651 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1652 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1653 ierr = VecDestroy(&x);CHKERRQ(ierr); 1654 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1655 } 1656 1657 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1658 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1659 if (isseqaij) { 1660 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1661 } else { 1662 Mat work_mat; 1663 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1664 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1665 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1666 } 1667 if (matis->A->symmetric_set) { 1668 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1669 #if !defined(PETSC_USE_COMPLEX) 1670 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1671 #endif 1672 } 1673 /* 1674 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1675 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1676 */ 1677 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1683 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1684 { 1685 PC_IS* pcis = (PC_IS*)(pc->data); 1686 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1687 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1688 PetscInt *idx_R_local=NULL; 1689 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1690 PetscInt vbs,bs; 1691 PetscBT bitmask=NULL; 1692 PetscErrorCode ierr; 1693 1694 PetscFunctionBegin; 1695 /* 1696 No need to setup local scatters if 1697 - primal space is unchanged 1698 AND 1699 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1700 AND 1701 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1702 */ 1703 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1704 PetscFunctionReturn(0); 1705 } 1706 /* destroy old objects */ 1707 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1708 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1709 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1710 /* Set Non-overlapping dimensions */ 1711 n_B = pcis->n_B; 1712 n_D = pcis->n - n_B; 1713 n_vertices = pcbddc->n_vertices; 1714 1715 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1716 1717 /* create auxiliary bitmask and allocate workspace */ 1718 if (!sub_schurs->reuse_mumps) { 1719 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1720 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1721 for (i=0;i<n_vertices;i++) { 1722 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1723 } 1724 1725 for (i=0, n_R=0; i<pcis->n; i++) { 1726 if (!PetscBTLookup(bitmask,i)) { 1727 idx_R_local[n_R++] = i; 1728 } 1729 } 1730 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1731 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1732 1733 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1734 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1735 } 1736 1737 /* Block code */ 1738 vbs = 1; 1739 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1740 if (bs>1 && !(n_vertices%bs)) { 1741 PetscBool is_blocked = PETSC_TRUE; 1742 PetscInt *vary; 1743 if (!sub_schurs->reuse_mumps) { 1744 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1745 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1746 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1747 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1748 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1749 for (i=0; i<pcis->n/bs; i++) { 1750 if (vary[i]!=0 && vary[i]!=bs) { 1751 is_blocked = PETSC_FALSE; 1752 break; 1753 } 1754 } 1755 ierr = PetscFree(vary);CHKERRQ(ierr); 1756 } else { 1757 /* Verify directly the R set */ 1758 for (i=0; i<n_R/bs; i++) { 1759 PetscInt j,node=idx_R_local[bs*i]; 1760 for (j=1; j<bs; j++) { 1761 if (node != idx_R_local[bs*i+j]-j) { 1762 is_blocked = PETSC_FALSE; 1763 break; 1764 } 1765 } 1766 } 1767 } 1768 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1769 vbs = bs; 1770 for (i=0;i<n_R/vbs;i++) { 1771 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1772 } 1773 } 1774 } 1775 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1776 if (sub_schurs->reuse_mumps) { 1777 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1778 1779 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1780 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1781 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1782 reuse_mumps->is_R = pcbddc->is_R_local; 1783 } else { 1784 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1785 } 1786 1787 /* print some info if requested */ 1788 if (pcbddc->dbg_flag) { 1789 PetscInt benign = 0; 1790 1791 if (pcbddc->benign_p0_lidx >= 0) benign = 1; 1792 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1793 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1794 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1796 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1797 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1798 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1799 } 1800 1801 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1802 if (!sub_schurs->reuse_mumps) { 1803 IS is_aux1,is_aux2; 1804 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1805 1806 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1807 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1808 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1809 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1810 for (i=0; i<n_D; i++) { 1811 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1812 } 1813 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1814 for (i=0, j=0; i<n_R; i++) { 1815 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1816 aux_array1[j++] = i; 1817 } 1818 } 1819 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1820 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1821 for (i=0, j=0; i<n_B; i++) { 1822 if (!PetscBTLookup(bitmask,is_indices[i])) { 1823 aux_array2[j++] = i; 1824 } 1825 } 1826 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1827 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1828 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1829 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1830 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1831 1832 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1833 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1834 for (i=0, j=0; i<n_R; i++) { 1835 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1836 aux_array1[j++] = i; 1837 } 1838 } 1839 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1840 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1841 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1842 } 1843 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1844 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1845 } else { 1846 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1847 IS tis; 1848 PetscInt schur_size; 1849 1850 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1851 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1852 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1853 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1854 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1855 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1856 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1857 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1858 } 1859 } 1860 PetscFunctionReturn(0); 1861 } 1862 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1866 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1867 { 1868 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1869 PC_IS *pcis = (PC_IS*)pc->data; 1870 PC pc_temp; 1871 Mat A_RR; 1872 MatReuse reuse; 1873 PetscScalar m_one = -1.0; 1874 PetscReal value; 1875 PetscInt n_D,n_R; 1876 PetscBool use_exact,use_exact_reduced,issbaij; 1877 PetscErrorCode ierr; 1878 /* prefixes stuff */ 1879 char dir_prefix[256],neu_prefix[256],str_level[16]; 1880 size_t len; 1881 1882 PetscFunctionBegin; 1883 1884 /* compute prefixes */ 1885 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1886 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1887 if (!pcbddc->current_level) { 1888 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1889 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1890 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1891 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1892 } else { 1893 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1894 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1895 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1896 len -= 15; /* remove "pc_bddc_coarse_" */ 1897 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1898 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1899 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1900 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1901 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1902 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1903 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1904 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1905 } 1906 1907 /* DIRICHLET PROBLEM */ 1908 if (dirichlet) { 1909 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1910 if (pcbddc->local_mat->symmetric_set) { 1911 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1912 } 1913 /* Matrix for Dirichlet problem is pcis->A_II */ 1914 n_D = pcis->n - pcis->n_B; 1915 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1916 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1917 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1918 /* default */ 1919 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1920 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1921 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1922 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1923 if (issbaij) { 1924 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1925 } else { 1926 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1927 } 1928 /* Allow user's customization */ 1929 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1930 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1931 } 1932 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1933 if (sub_schurs->reuse_mumps) { 1934 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1935 1936 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1937 } 1938 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1939 if (!n_D) { 1940 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1941 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1942 } 1943 /* Set Up KSP for Dirichlet problem of BDDC */ 1944 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1945 /* set ksp_D into pcis data */ 1946 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1947 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1948 pcis->ksp_D = pcbddc->ksp_D; 1949 } 1950 1951 /* NEUMANN PROBLEM */ 1952 A_RR = 0; 1953 if (neumann) { 1954 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1955 PetscInt ibs,mbs; 1956 PetscBool issbaij; 1957 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1958 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1959 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1960 if (pcbddc->ksp_R) { /* already created ksp */ 1961 PetscInt nn_R; 1962 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1963 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1964 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1965 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1966 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1967 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1968 reuse = MAT_INITIAL_MATRIX; 1969 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1970 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1971 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1972 reuse = MAT_INITIAL_MATRIX; 1973 } else { /* safe to reuse the matrix */ 1974 reuse = MAT_REUSE_MATRIX; 1975 } 1976 } 1977 /* last check */ 1978 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1979 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1980 reuse = MAT_INITIAL_MATRIX; 1981 } 1982 } else { /* first time, so we need to create the matrix */ 1983 reuse = MAT_INITIAL_MATRIX; 1984 } 1985 /* extract A_RR */ 1986 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1987 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1988 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1989 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1990 if (matis->A == pcbddc->local_mat) { 1991 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1992 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1993 } else { 1994 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1995 } 1996 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1997 if (matis->A == pcbddc->local_mat) { 1998 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1999 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2000 } else { 2001 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2002 } 2003 } 2004 if (!sub_schurs->reuse_mumps) { 2005 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2006 if (pcbddc->local_mat->symmetric_set) { 2007 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2008 } 2009 } else { 2010 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2011 2012 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2013 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2014 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2015 } 2016 if (!pcbddc->ksp_R) { /* create object if not present */ 2017 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2018 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2019 /* default */ 2020 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2021 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2022 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2023 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2024 if (issbaij) { 2025 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2026 } else { 2027 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2028 } 2029 /* Allow user's customization */ 2030 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2031 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2032 } 2033 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2034 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2035 if (!n_R) { 2036 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2037 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2038 } 2039 /* Reuse MUMPS solver if it is present */ 2040 if (sub_schurs->reuse_mumps) { 2041 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2042 2043 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2044 } 2045 /* Set Up KSP for Neumann problem of BDDC */ 2046 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2047 } 2048 /* free Neumann problem's matrix */ 2049 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2050 2051 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2052 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2053 if (pcbddc->dbg_flag) { 2054 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2055 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2056 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2057 } 2058 if (dirichlet) { /* Dirichlet */ 2059 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2060 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2061 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2062 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2063 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2064 /* need to be adapted? */ 2065 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2066 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2067 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2068 /* print info */ 2069 if (pcbddc->dbg_flag) { 2070 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2071 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2072 } 2073 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2074 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2075 } 2076 } 2077 if (neumann) { /* Neumann */ 2078 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2079 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2080 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2081 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2082 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2083 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2084 /* need to be adapted? */ 2085 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2086 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2087 /* print info */ 2088 if (pcbddc->dbg_flag) { 2089 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2090 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2091 } 2092 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2093 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2094 } 2095 } 2096 } 2097 PetscFunctionReturn(0); 2098 } 2099 2100 #undef __FUNCT__ 2101 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2102 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2103 { 2104 PetscErrorCode ierr; 2105 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2106 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2107 2108 PetscFunctionBegin; 2109 if (!sub_schurs->reuse_mumps) { 2110 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2111 } 2112 if (!pcbddc->switch_static) { 2113 if (applytranspose && pcbddc->local_auxmat1) { 2114 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2115 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2116 } 2117 if (!sub_schurs->reuse_mumps) { 2118 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2119 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2120 } else { 2121 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2122 2123 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2124 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2125 } 2126 } else { 2127 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2128 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2129 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2130 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2131 if (applytranspose && pcbddc->local_auxmat1) { 2132 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2133 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2134 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2135 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2136 } 2137 } 2138 if (!sub_schurs->reuse_mumps) { 2139 if (applytranspose) { 2140 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2141 } else { 2142 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2143 } 2144 #if defined(PETSC_HAVE_MUMPS) 2145 } else { 2146 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2147 2148 if (applytranspose) { 2149 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2150 } else { 2151 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2152 } 2153 #endif 2154 } 2155 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2156 if (!pcbddc->switch_static) { 2157 if (!sub_schurs->reuse_mumps) { 2158 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2159 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2160 } else { 2161 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2162 2163 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2164 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2165 } 2166 if (!applytranspose && pcbddc->local_auxmat1) { 2167 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2168 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2169 } 2170 } else { 2171 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2172 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2173 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2174 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2175 if (!applytranspose && pcbddc->local_auxmat1) { 2176 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2177 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2178 } 2179 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2180 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2181 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2182 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 PetscFunctionReturn(0); 2185 } 2186 2187 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2190 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2191 { 2192 PetscErrorCode ierr; 2193 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2194 PC_IS* pcis = (PC_IS*) (pc->data); 2195 const PetscScalar zero = 0.0; 2196 2197 PetscFunctionBegin; 2198 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2199 if (applytranspose) { 2200 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2201 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2202 } else { 2203 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2204 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2205 } 2206 2207 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2208 if (pcbddc->benign_p0_lidx >= 0) { 2209 PetscScalar *array; 2210 2211 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2212 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2213 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2214 } 2215 2216 /* start communications from local primal nodes to rhs of coarse solver */ 2217 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2218 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2219 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2220 2221 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2222 /* TODO remove null space when doing multilevel */ 2223 if (pcbddc->coarse_ksp) { 2224 Vec rhs,sol; 2225 2226 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2227 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2228 if (applytranspose) { 2229 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2230 } else { 2231 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2232 } 2233 } 2234 2235 /* Local solution on R nodes */ 2236 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2237 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2238 } 2239 2240 /* communications from coarse sol to local primal nodes */ 2241 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2242 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2243 2244 /* Sum contributions from two levels */ 2245 if (applytranspose) { 2246 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2247 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2248 } else { 2249 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2250 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2251 } 2252 /* store p0 */ 2253 if (pcbddc->benign_p0_lidx >= 0) { 2254 PetscScalar *array; 2255 2256 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2257 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2258 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2265 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2266 { 2267 PetscErrorCode ierr; 2268 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2269 PetscScalar *array; 2270 Vec from,to; 2271 2272 PetscFunctionBegin; 2273 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2274 from = pcbddc->coarse_vec; 2275 to = pcbddc->vec1_P; 2276 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2277 Vec tvec; 2278 2279 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2280 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2281 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2282 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2283 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2284 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2285 } 2286 } else { /* from local to global -> put data in coarse right hand side */ 2287 from = pcbddc->vec1_P; 2288 to = pcbddc->coarse_vec; 2289 } 2290 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2296 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2297 { 2298 PetscErrorCode ierr; 2299 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2300 PetscScalar *array; 2301 Vec from,to; 2302 2303 PetscFunctionBegin; 2304 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2305 from = pcbddc->coarse_vec; 2306 to = pcbddc->vec1_P; 2307 } else { /* from local to global -> put data in coarse right hand side */ 2308 from = pcbddc->vec1_P; 2309 to = pcbddc->coarse_vec; 2310 } 2311 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2312 if (smode == SCATTER_FORWARD) { 2313 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2314 Vec tvec; 2315 2316 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2317 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2318 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2319 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2320 } 2321 } else { 2322 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2323 ierr = VecResetArray(from);CHKERRQ(ierr); 2324 } 2325 } 2326 PetscFunctionReturn(0); 2327 } 2328 2329 /* uncomment for testing purposes */ 2330 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2331 #undef __FUNCT__ 2332 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2333 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2334 { 2335 PetscErrorCode ierr; 2336 PC_IS* pcis = (PC_IS*)(pc->data); 2337 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2338 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2339 /* one and zero */ 2340 PetscScalar one=1.0,zero=0.0; 2341 /* space to store constraints and their local indices */ 2342 PetscScalar *constraints_data; 2343 PetscInt *constraints_idxs,*constraints_idxs_B; 2344 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2345 PetscInt *constraints_n; 2346 /* iterators */ 2347 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2348 /* BLAS integers */ 2349 PetscBLASInt lwork,lierr; 2350 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2351 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2352 /* reuse */ 2353 PetscInt olocal_primal_size,olocal_primal_size_cc; 2354 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2355 /* change of basis */ 2356 PetscBool qr_needed; 2357 PetscBT change_basis,qr_needed_idx; 2358 /* auxiliary stuff */ 2359 PetscInt *nnz,*is_indices; 2360 PetscInt ncc; 2361 /* some quantities */ 2362 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2363 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2364 2365 PetscFunctionBegin; 2366 /* Destroy Mat objects computed previously */ 2367 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2368 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2369 /* save info on constraints from previous setup (if any) */ 2370 olocal_primal_size = pcbddc->local_primal_size; 2371 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2372 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2373 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2374 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2375 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2376 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2377 2378 /* print some info */ 2379 if (pcbddc->dbg_flag) { 2380 IS vertices; 2381 PetscInt nv,nedges,nfaces; 2382 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2383 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2384 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2385 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2386 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2387 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2388 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2389 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2390 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2391 } 2392 2393 if (!pcbddc->adaptive_selection) { 2394 IS ISForVertices,*ISForFaces,*ISForEdges; 2395 MatNullSpace nearnullsp; 2396 const Vec *nearnullvecs; 2397 Vec *localnearnullsp; 2398 PetscScalar *array; 2399 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2400 PetscBool nnsp_has_cnst; 2401 /* LAPACK working arrays for SVD or POD */ 2402 PetscBool skip_lapack,boolforchange; 2403 PetscScalar *work; 2404 PetscReal *singular_vals; 2405 #if defined(PETSC_USE_COMPLEX) 2406 PetscReal *rwork; 2407 #endif 2408 #if defined(PETSC_MISSING_LAPACK_GESVD) 2409 PetscScalar *temp_basis,*correlation_mat; 2410 #else 2411 PetscBLASInt dummy_int=1; 2412 PetscScalar dummy_scalar=1.; 2413 #endif 2414 2415 /* Get index sets for faces, edges and vertices from graph */ 2416 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2417 /* free unneeded index sets */ 2418 if (!pcbddc->use_vertices) { 2419 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2420 } 2421 if (!pcbddc->use_edges) { 2422 for (i=0;i<n_ISForEdges;i++) { 2423 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2424 } 2425 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2426 n_ISForEdges = 0; 2427 } 2428 if (!pcbddc->use_faces) { 2429 for (i=0;i<n_ISForFaces;i++) { 2430 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2431 } 2432 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2433 n_ISForFaces = 0; 2434 } 2435 2436 #if defined(PETSC_USE_DEBUG) 2437 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2438 Also use_change_of_basis should be consistent among processors */ 2439 if (pcbddc->NullSpace) { 2440 PetscBool tbool[2],gbool[2]; 2441 2442 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2443 pcbddc->use_change_of_basis = PETSC_TRUE; 2444 if (!ISForEdges) { 2445 pcbddc->use_change_on_faces = PETSC_TRUE; 2446 } 2447 } 2448 tbool[0] = pcbddc->use_change_of_basis; 2449 tbool[1] = pcbddc->use_change_on_faces; 2450 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2451 pcbddc->use_change_of_basis = gbool[0]; 2452 pcbddc->use_change_on_faces = gbool[1]; 2453 } 2454 #endif 2455 2456 /* check if near null space is attached to global mat */ 2457 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2458 if (nearnullsp) { 2459 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2460 /* remove any stored info */ 2461 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2462 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2463 /* store information for BDDC solver reuse */ 2464 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2465 pcbddc->onearnullspace = nearnullsp; 2466 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2467 for (i=0;i<nnsp_size;i++) { 2468 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2469 } 2470 } else { /* if near null space is not provided BDDC uses constants by default */ 2471 nnsp_size = 0; 2472 nnsp_has_cnst = PETSC_TRUE; 2473 } 2474 /* get max number of constraints on a single cc */ 2475 max_constraints = nnsp_size; 2476 if (nnsp_has_cnst) max_constraints++; 2477 2478 /* 2479 Evaluate maximum storage size needed by the procedure 2480 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2481 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2482 There can be multiple constraints per connected component 2483 */ 2484 n_vertices = 0; 2485 if (ISForVertices) { 2486 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2487 } 2488 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2489 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2490 2491 total_counts = n_ISForFaces+n_ISForEdges; 2492 total_counts *= max_constraints; 2493 total_counts += n_vertices; 2494 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2495 2496 total_counts = 0; 2497 max_size_of_constraint = 0; 2498 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2499 IS used_is; 2500 if (i<n_ISForEdges) { 2501 used_is = ISForEdges[i]; 2502 } else { 2503 used_is = ISForFaces[i-n_ISForEdges]; 2504 } 2505 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2506 total_counts += j; 2507 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2508 } 2509 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2510 2511 /* get local part of global near null space vectors */ 2512 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2513 for (k=0;k<nnsp_size;k++) { 2514 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2515 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2516 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2517 } 2518 2519 /* whether or not to skip lapack calls */ 2520 skip_lapack = PETSC_TRUE; 2521 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2522 2523 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2524 if (!skip_lapack) { 2525 PetscScalar temp_work; 2526 2527 #if defined(PETSC_MISSING_LAPACK_GESVD) 2528 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2529 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2530 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2531 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2532 #if defined(PETSC_USE_COMPLEX) 2533 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2534 #endif 2535 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2536 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2537 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2538 lwork = -1; 2539 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2540 #if !defined(PETSC_USE_COMPLEX) 2541 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2542 #else 2543 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2544 #endif 2545 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2546 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2547 #else /* on missing GESVD */ 2548 /* SVD */ 2549 PetscInt max_n,min_n; 2550 max_n = max_size_of_constraint; 2551 min_n = max_constraints; 2552 if (max_size_of_constraint < max_constraints) { 2553 min_n = max_size_of_constraint; 2554 max_n = max_constraints; 2555 } 2556 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2557 #if defined(PETSC_USE_COMPLEX) 2558 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2559 #endif 2560 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2561 lwork = -1; 2562 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2563 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2564 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2565 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2566 #if !defined(PETSC_USE_COMPLEX) 2567 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2568 #else 2569 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2570 #endif 2571 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2572 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2573 #endif /* on missing GESVD */ 2574 /* Allocate optimal workspace */ 2575 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2576 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2577 } 2578 /* Now we can loop on constraining sets */ 2579 total_counts = 0; 2580 constraints_idxs_ptr[0] = 0; 2581 constraints_data_ptr[0] = 0; 2582 /* vertices */ 2583 if (n_vertices) { 2584 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2585 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2586 for (i=0;i<n_vertices;i++) { 2587 constraints_n[total_counts] = 1; 2588 constraints_data[total_counts] = 1.0; 2589 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2590 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2591 total_counts++; 2592 } 2593 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2594 n_vertices = total_counts; 2595 } 2596 2597 /* edges and faces */ 2598 total_counts_cc = total_counts; 2599 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2600 IS used_is; 2601 PetscBool idxs_copied = PETSC_FALSE; 2602 2603 if (ncc<n_ISForEdges) { 2604 used_is = ISForEdges[ncc]; 2605 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2606 } else { 2607 used_is = ISForFaces[ncc-n_ISForEdges]; 2608 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2609 } 2610 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2611 2612 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2613 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2614 /* change of basis should not be performed on local periodic nodes */ 2615 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2616 if (nnsp_has_cnst) { 2617 PetscScalar quad_value; 2618 2619 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2620 idxs_copied = PETSC_TRUE; 2621 2622 if (!pcbddc->use_nnsp_true) { 2623 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2624 } else { 2625 quad_value = 1.0; 2626 } 2627 for (j=0;j<size_of_constraint;j++) { 2628 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2629 } 2630 temp_constraints++; 2631 total_counts++; 2632 } 2633 for (k=0;k<nnsp_size;k++) { 2634 PetscReal real_value; 2635 PetscScalar *ptr_to_data; 2636 2637 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2638 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2639 for (j=0;j<size_of_constraint;j++) { 2640 ptr_to_data[j] = array[is_indices[j]]; 2641 } 2642 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2643 /* check if array is null on the connected component */ 2644 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2645 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2646 if (real_value > 0.0) { /* keep indices and values */ 2647 temp_constraints++; 2648 total_counts++; 2649 if (!idxs_copied) { 2650 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2651 idxs_copied = PETSC_TRUE; 2652 } 2653 } 2654 } 2655 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2656 valid_constraints = temp_constraints; 2657 if (!pcbddc->use_nnsp_true && temp_constraints) { 2658 if (temp_constraints == 1) { /* just normalize the constraint */ 2659 PetscScalar norm,*ptr_to_data; 2660 2661 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2662 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2663 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2664 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2665 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2666 } else { /* perform SVD */ 2667 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2668 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2669 2670 #if defined(PETSC_MISSING_LAPACK_GESVD) 2671 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2672 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2673 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2674 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2675 from that computed using LAPACKgesvd 2676 -> This is due to a different computation of eigenvectors in LAPACKheev 2677 -> The quality of the POD-computed basis will be the same */ 2678 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2679 /* Store upper triangular part of correlation matrix */ 2680 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2681 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2682 for (j=0;j<temp_constraints;j++) { 2683 for (k=0;k<j+1;k++) { 2684 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2685 } 2686 } 2687 /* compute eigenvalues and eigenvectors of correlation matrix */ 2688 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2689 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2690 #if !defined(PETSC_USE_COMPLEX) 2691 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2692 #else 2693 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2694 #endif 2695 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2696 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2697 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2698 j = 0; 2699 while (j < temp_constraints && singular_vals[j] < tol) j++; 2700 total_counts = total_counts-j; 2701 valid_constraints = temp_constraints-j; 2702 /* scale and copy POD basis into used quadrature memory */ 2703 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2704 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2705 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2706 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2707 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2708 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2709 if (j<temp_constraints) { 2710 PetscInt ii; 2711 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2712 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2713 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2714 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2715 for (k=0;k<temp_constraints-j;k++) { 2716 for (ii=0;ii<size_of_constraint;ii++) { 2717 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2718 } 2719 } 2720 } 2721 #else /* on missing GESVD */ 2722 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2723 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2725 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2726 #if !defined(PETSC_USE_COMPLEX) 2727 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2728 #else 2729 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2730 #endif 2731 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2732 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2733 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2734 k = temp_constraints; 2735 if (k > size_of_constraint) k = size_of_constraint; 2736 j = 0; 2737 while (j < k && singular_vals[k-j-1] < tol) j++; 2738 valid_constraints = k-j; 2739 total_counts = total_counts-temp_constraints+valid_constraints; 2740 #endif /* on missing GESVD */ 2741 } 2742 } 2743 /* update pointers information */ 2744 if (valid_constraints) { 2745 constraints_n[total_counts_cc] = valid_constraints; 2746 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2747 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2748 /* set change_of_basis flag */ 2749 if (boolforchange) { 2750 PetscBTSet(change_basis,total_counts_cc); 2751 } 2752 total_counts_cc++; 2753 } 2754 } 2755 /* free workspace */ 2756 if (!skip_lapack) { 2757 ierr = PetscFree(work);CHKERRQ(ierr); 2758 #if defined(PETSC_USE_COMPLEX) 2759 ierr = PetscFree(rwork);CHKERRQ(ierr); 2760 #endif 2761 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2762 #if defined(PETSC_MISSING_LAPACK_GESVD) 2763 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2764 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2765 #endif 2766 } 2767 for (k=0;k<nnsp_size;k++) { 2768 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2769 } 2770 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2771 /* free index sets of faces, edges and vertices */ 2772 for (i=0;i<n_ISForFaces;i++) { 2773 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2774 } 2775 if (n_ISForFaces) { 2776 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2777 } 2778 for (i=0;i<n_ISForEdges;i++) { 2779 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2780 } 2781 if (n_ISForEdges) { 2782 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2783 } 2784 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2785 } else { 2786 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2787 2788 total_counts = 0; 2789 n_vertices = 0; 2790 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2791 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2792 } 2793 max_constraints = 0; 2794 total_counts_cc = 0; 2795 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2796 total_counts += pcbddc->adaptive_constraints_n[i]; 2797 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2798 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2799 } 2800 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2801 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2802 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2803 constraints_data = pcbddc->adaptive_constraints_data; 2804 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2805 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2806 total_counts_cc = 0; 2807 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2808 if (pcbddc->adaptive_constraints_n[i]) { 2809 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2810 } 2811 } 2812 #if 0 2813 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2814 for (i=0;i<total_counts_cc;i++) { 2815 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2816 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2817 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2818 printf(" %d",constraints_idxs[j]); 2819 } 2820 printf("\n"); 2821 printf("number of cc: %d\n",constraints_n[i]); 2822 } 2823 for (i=0;i<n_vertices;i++) { 2824 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2825 } 2826 for (i=0;i<sub_schurs->n_subs;i++) { 2827 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2828 } 2829 #endif 2830 2831 max_size_of_constraint = 0; 2832 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2833 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2834 /* Change of basis */ 2835 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2836 if (pcbddc->use_change_of_basis) { 2837 for (i=0;i<sub_schurs->n_subs;i++) { 2838 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2839 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2840 } 2841 } 2842 } 2843 } 2844 pcbddc->local_primal_size = total_counts; 2845 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2846 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2847 2848 /* map constraints_idxs in boundary numbering */ 2849 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2850 if (i != constraints_idxs_ptr[total_counts_cc]) { 2851 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2852 } 2853 2854 /* Create constraint matrix */ 2855 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2856 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2857 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2858 2859 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2860 /* determine if a QR strategy is needed for change of basis */ 2861 qr_needed = PETSC_FALSE; 2862 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2863 total_primal_vertices=0; 2864 pcbddc->local_primal_size_cc = 0; 2865 for (i=0;i<total_counts_cc;i++) { 2866 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2867 if (size_of_constraint == 1) { 2868 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2869 pcbddc->local_primal_size_cc += 1; 2870 } else if (PetscBTLookup(change_basis,i)) { 2871 for (k=0;k<constraints_n[i];k++) { 2872 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2873 } 2874 pcbddc->local_primal_size_cc += constraints_n[i]; 2875 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2876 PetscBTSet(qr_needed_idx,i); 2877 qr_needed = PETSC_TRUE; 2878 } 2879 } else { 2880 pcbddc->local_primal_size_cc += 1; 2881 } 2882 } 2883 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2884 pcbddc->n_vertices = total_primal_vertices; 2885 /* permute indices in order to have a sorted set of vertices */ 2886 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2887 2888 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2889 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2890 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2891 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2892 2893 /* nonzero structure of constraint matrix */ 2894 /* and get reference dof for local constraints */ 2895 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2896 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2897 2898 j = total_primal_vertices; 2899 total_counts = total_primal_vertices; 2900 cum = total_primal_vertices; 2901 for (i=n_vertices;i<total_counts_cc;i++) { 2902 if (!PetscBTLookup(change_basis,i)) { 2903 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2904 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2905 cum++; 2906 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2907 for (k=0;k<constraints_n[i];k++) { 2908 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2909 nnz[j+k] = size_of_constraint; 2910 } 2911 j += constraints_n[i]; 2912 } 2913 } 2914 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2915 ierr = PetscFree(nnz);CHKERRQ(ierr); 2916 2917 /* set values in constraint matrix */ 2918 for (i=0;i<total_primal_vertices;i++) { 2919 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2920 } 2921 total_counts = total_primal_vertices; 2922 for (i=n_vertices;i<total_counts_cc;i++) { 2923 if (!PetscBTLookup(change_basis,i)) { 2924 PetscInt *cols; 2925 2926 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2927 cols = constraints_idxs+constraints_idxs_ptr[i]; 2928 for (k=0;k<constraints_n[i];k++) { 2929 PetscInt row = total_counts+k; 2930 PetscScalar *vals; 2931 2932 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2933 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2934 } 2935 total_counts += constraints_n[i]; 2936 } 2937 } 2938 /* assembling */ 2939 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2940 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2941 2942 /* 2943 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2944 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2945 */ 2946 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2947 if (pcbddc->use_change_of_basis) { 2948 /* dual and primal dofs on a single cc */ 2949 PetscInt dual_dofs,primal_dofs; 2950 /* working stuff for GEQRF */ 2951 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2952 PetscBLASInt lqr_work; 2953 /* working stuff for UNGQR */ 2954 PetscScalar *gqr_work,lgqr_work_t; 2955 PetscBLASInt lgqr_work; 2956 /* working stuff for TRTRS */ 2957 PetscScalar *trs_rhs; 2958 PetscBLASInt Blas_NRHS; 2959 /* pointers for values insertion into change of basis matrix */ 2960 PetscInt *start_rows,*start_cols; 2961 PetscScalar *start_vals; 2962 /* working stuff for values insertion */ 2963 PetscBT is_primal; 2964 PetscInt *aux_primal_numbering_B; 2965 /* matrix sizes */ 2966 PetscInt global_size,local_size; 2967 /* temporary change of basis */ 2968 Mat localChangeOfBasisMatrix; 2969 /* extra space for debugging */ 2970 PetscScalar *dbg_work; 2971 2972 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2973 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2974 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2975 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2976 /* nonzeros for local mat */ 2977 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2978 for (i=0;i<pcis->n;i++) nnz[i]=1; 2979 for (i=n_vertices;i<total_counts_cc;i++) { 2980 if (PetscBTLookup(change_basis,i)) { 2981 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2982 if (PetscBTLookup(qr_needed_idx,i)) { 2983 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2984 } else { 2985 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2986 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2987 } 2988 } 2989 } 2990 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2991 ierr = PetscFree(nnz);CHKERRQ(ierr); 2992 /* Set initial identity in the matrix */ 2993 for (i=0;i<pcis->n;i++) { 2994 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2995 } 2996 2997 if (pcbddc->dbg_flag) { 2998 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2999 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3000 } 3001 3002 3003 /* Now we loop on the constraints which need a change of basis */ 3004 /* 3005 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3006 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3007 3008 Basic blocks of change of basis matrix T computed by 3009 3010 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3011 3012 | 1 0 ... 0 s_1/S | 3013 | 0 1 ... 0 s_2/S | 3014 | ... | 3015 | 0 ... 1 s_{n-1}/S | 3016 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3017 3018 with S = \sum_{i=1}^n s_i^2 3019 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3020 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3021 3022 - QR decomposition of constraints otherwise 3023 */ 3024 if (qr_needed) { 3025 /* space to store Q */ 3026 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3027 /* first we issue queries for optimal work */ 3028 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3029 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3030 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3031 lqr_work = -1; 3032 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3033 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3034 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3035 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3036 lgqr_work = -1; 3037 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3038 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3039 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3040 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3041 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3042 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3043 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3044 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3045 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3046 /* array to store scaling factors for reflectors */ 3047 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3048 /* array to store rhs and solution of triangular solver */ 3049 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3050 /* allocating workspace for check */ 3051 if (pcbddc->dbg_flag) { 3052 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3053 } 3054 } 3055 /* array to store whether a node is primal or not */ 3056 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3057 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3058 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3059 if (i != total_primal_vertices) { 3060 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3061 } 3062 for (i=0;i<total_primal_vertices;i++) { 3063 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3064 } 3065 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3066 3067 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3068 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3069 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3070 if (PetscBTLookup(change_basis,total_counts)) { 3071 /* get constraint info */ 3072 primal_dofs = constraints_n[total_counts]; 3073 dual_dofs = size_of_constraint-primal_dofs; 3074 3075 if (pcbddc->dbg_flag) { 3076 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3077 } 3078 3079 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3080 3081 /* copy quadrature constraints for change of basis check */ 3082 if (pcbddc->dbg_flag) { 3083 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3084 } 3085 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3086 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3087 3088 /* compute QR decomposition of constraints */ 3089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3090 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3091 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3092 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3093 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3094 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3095 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3096 3097 /* explictly compute R^-T */ 3098 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3099 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3100 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3101 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3102 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3103 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3104 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3105 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3106 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3107 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3108 3109 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3110 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3111 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3112 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3113 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3114 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3115 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3116 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3117 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3118 3119 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3120 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3121 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3122 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3123 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3124 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3126 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3127 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3128 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3129 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3130 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3131 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3132 3133 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3134 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3135 /* insert cols for primal dofs */ 3136 for (j=0;j<primal_dofs;j++) { 3137 start_vals = &qr_basis[j*size_of_constraint]; 3138 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3139 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3140 } 3141 /* insert cols for dual dofs */ 3142 for (j=0,k=0;j<dual_dofs;k++) { 3143 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3144 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3145 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3146 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3147 j++; 3148 } 3149 } 3150 3151 /* check change of basis */ 3152 if (pcbddc->dbg_flag) { 3153 PetscInt ii,jj; 3154 PetscBool valid_qr=PETSC_TRUE; 3155 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3156 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3157 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3158 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3159 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3160 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3161 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3162 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3163 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3164 for (jj=0;jj<size_of_constraint;jj++) { 3165 for (ii=0;ii<primal_dofs;ii++) { 3166 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3167 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3168 } 3169 } 3170 if (!valid_qr) { 3171 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3172 for (jj=0;jj<size_of_constraint;jj++) { 3173 for (ii=0;ii<primal_dofs;ii++) { 3174 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3175 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3176 } 3177 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3178 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3179 } 3180 } 3181 } 3182 } else { 3183 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3184 } 3185 } 3186 } else { /* simple transformation block */ 3187 PetscInt row,col; 3188 PetscScalar val,norm; 3189 3190 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3191 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3192 for (j=0;j<size_of_constraint;j++) { 3193 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3194 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3195 if (!PetscBTLookup(is_primal,row_B)) { 3196 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3197 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3198 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3199 } else { 3200 for (k=0;k<size_of_constraint;k++) { 3201 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3202 if (row != col) { 3203 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3204 } else { 3205 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3206 } 3207 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3208 } 3209 } 3210 } 3211 if (pcbddc->dbg_flag) { 3212 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3213 } 3214 } 3215 } else { 3216 if (pcbddc->dbg_flag) { 3217 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3218 } 3219 } 3220 } 3221 3222 /* free workspace */ 3223 if (qr_needed) { 3224 if (pcbddc->dbg_flag) { 3225 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3226 } 3227 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3228 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3229 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3230 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3231 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3232 } 3233 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3234 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3235 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3236 3237 /* assembling of global change of variable */ 3238 { 3239 Mat tmat; 3240 PetscInt bs; 3241 3242 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3243 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3244 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3245 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3246 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3247 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3248 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3249 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3250 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3251 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3252 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3253 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3254 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3255 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3256 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3257 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3258 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3259 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3260 } 3261 /* check */ 3262 if (pcbddc->dbg_flag) { 3263 PetscReal error; 3264 Vec x,x_change; 3265 3266 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3267 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3268 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3269 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3270 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3271 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3272 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3273 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3274 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3275 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3276 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3277 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3278 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3279 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3280 ierr = VecDestroy(&x);CHKERRQ(ierr); 3281 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3282 } 3283 3284 /* adapt sub_schurs computed (if any) */ 3285 if (pcbddc->use_deluxe_scaling) { 3286 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3287 if (sub_schurs->S_Ej_all) { 3288 Mat S_new,tmat; 3289 ISLocalToGlobalMapping NtoSall; 3290 IS is_all_N,is_V,is_V_Sall; 3291 const PetscScalar *array; 3292 const PetscInt *idxs_V,*idxs_all; 3293 PetscInt i,n_V; 3294 3295 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3296 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3297 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3298 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3299 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3300 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3301 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3302 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3303 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3304 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3305 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3306 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3307 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3308 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3309 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3310 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3311 for (i=0;i<n_V;i++) { 3312 PetscScalar val; 3313 PetscInt idx; 3314 3315 idx = idxs_V[i]; 3316 val = array[idxs_all[idxs_V[i]]]; 3317 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3318 } 3319 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3320 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3321 sub_schurs->S_Ej_all = S_new; 3322 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3323 if (sub_schurs->sum_S_Ej_all) { 3324 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3325 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3326 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3327 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3328 sub_schurs->sum_S_Ej_all = S_new; 3329 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3330 } 3331 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3332 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3333 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3334 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3335 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3336 } 3337 } 3338 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3339 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3340 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3341 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3342 } 3343 3344 /* set up change of basis context */ 3345 if (pcbddc->ChangeOfBasisMatrix) { 3346 PCBDDCChange_ctx change_ctx; 3347 3348 if (!pcbddc->new_global_mat) { 3349 PetscInt global_size,local_size; 3350 3351 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3352 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3353 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3354 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3355 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3356 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3357 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3358 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3359 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3360 } else { 3361 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3362 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3363 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3364 } 3365 if (!pcbddc->user_ChangeOfBasisMatrix) { 3366 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3367 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3368 } else { 3369 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3370 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3371 } 3372 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3373 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3374 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3375 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3376 } 3377 3378 /* add pressure dof to set of primal nodes for numbering purposes */ 3379 if (pcbddc->benign_p0_lidx >= 0) { 3380 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3381 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx; 3382 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3383 pcbddc->local_primal_size_cc++; 3384 pcbddc->local_primal_size++; 3385 } 3386 3387 /* check if a new primal space has been introduced (also take into account benign trick) */ 3388 pcbddc->new_primal_space_local = PETSC_TRUE; 3389 if (olocal_primal_size == pcbddc->local_primal_size) { 3390 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3391 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3392 if (!pcbddc->new_primal_space_local) { 3393 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3394 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3395 } 3396 } 3397 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3398 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3399 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3400 3401 /* flush dbg viewer */ 3402 if (pcbddc->dbg_flag) { 3403 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3404 } 3405 3406 /* free workspace */ 3407 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3408 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3409 if (!pcbddc->adaptive_selection) { 3410 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3411 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3412 } else { 3413 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3414 pcbddc->adaptive_constraints_idxs_ptr, 3415 pcbddc->adaptive_constraints_data_ptr, 3416 pcbddc->adaptive_constraints_idxs, 3417 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3418 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3419 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3420 } 3421 PetscFunctionReturn(0); 3422 } 3423 3424 #undef __FUNCT__ 3425 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3426 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3427 { 3428 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3429 PC_IS *pcis = (PC_IS*)pc->data; 3430 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3431 PetscInt ierr,i,vertex_size,N; 3432 PetscViewer viewer=pcbddc->dbg_viewer; 3433 3434 PetscFunctionBegin; 3435 /* Reset previously computed graph */ 3436 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3437 /* Init local Graph struct */ 3438 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3439 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3440 3441 /* Check validity of the csr graph passed in by the user */ 3442 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3443 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3444 } 3445 3446 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3447 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3448 PetscInt *xadj,*adjncy; 3449 PetscInt nvtxs; 3450 PetscBool flg_row=PETSC_FALSE; 3451 3452 if (pcbddc->use_local_adj) { 3453 3454 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3455 if (flg_row) { 3456 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3457 pcbddc->computed_rowadj = PETSC_TRUE; 3458 } 3459 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3460 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3461 IS is_dummy; 3462 ISLocalToGlobalMapping l2gmap_dummy; 3463 PetscInt j,sum; 3464 PetscInt *cxadj,*cadjncy; 3465 const PetscInt *idxs; 3466 PCBDDCGraph graph; 3467 PetscBT is_on_boundary; 3468 3469 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3470 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3471 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3472 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3473 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3474 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3475 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3476 if (flg_row) { 3477 graph->xadj = xadj; 3478 graph->adjncy = adjncy; 3479 } 3480 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3481 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3482 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3483 3484 if (pcbddc->dbg_flag) { 3485 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3486 for (i=0;i<graph->ncc;i++) { 3487 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3488 } 3489 } 3490 3491 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3492 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3493 for (i=0;i<pcis->n_B;i++) { 3494 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3495 } 3496 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3497 3498 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3499 sum = 0; 3500 for (i=0;i<graph->ncc;i++) { 3501 PetscInt sizecc = 0; 3502 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3503 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3504 sizecc++; 3505 } 3506 } 3507 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3508 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3509 cxadj[graph->queue[j]] = sizecc; 3510 } 3511 } 3512 sum += sizecc*sizecc; 3513 } 3514 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3515 sum = 0; 3516 for (i=0;i<pcis->n;i++) { 3517 PetscInt temp = cxadj[i]; 3518 cxadj[i] = sum; 3519 sum += temp; 3520 } 3521 cxadj[pcis->n] = sum; 3522 for (i=0;i<graph->ncc;i++) { 3523 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3524 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3525 PetscInt k,sizecc = 0; 3526 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3527 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3528 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3529 sizecc++; 3530 } 3531 } 3532 } 3533 } 3534 } 3535 if (sum) { 3536 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3537 } else { 3538 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3539 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3540 } 3541 graph->xadj = 0; 3542 graph->adjncy = 0; 3543 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3544 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3545 } 3546 } 3547 if (pcbddc->dbg_flag) { 3548 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3549 } 3550 3551 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3552 vertex_size = 1; 3553 if (pcbddc->user_provided_isfordofs) { 3554 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3555 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3556 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3557 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3558 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3559 } 3560 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3561 pcbddc->n_ISForDofs = 0; 3562 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3563 } 3564 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3565 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3566 } else { 3567 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3568 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3569 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3570 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3571 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3572 } 3573 } 3574 } 3575 3576 /* Setup of Graph */ 3577 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3578 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3579 } 3580 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3581 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3582 } 3583 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3584 3585 /* Graph's connected components analysis */ 3586 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3587 3588 /* print some info to stdout */ 3589 if (pcbddc->dbg_flag) { 3590 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3591 } 3592 3593 /* mark topography has done */ 3594 pcbddc->recompute_topography = PETSC_FALSE; 3595 PetscFunctionReturn(0); 3596 } 3597 3598 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3599 #undef __FUNCT__ 3600 #define __FUNCT__ "PCBDDCSubsetNumbering" 3601 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3602 { 3603 PetscSF sf; 3604 PetscLayout map; 3605 const PetscInt *idxs; 3606 PetscInt *leaf_data,*root_data,*gidxs; 3607 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3608 PetscInt n_n,nlocals,start,first_index; 3609 PetscMPIInt commsize; 3610 PetscBool first_found; 3611 PetscErrorCode ierr; 3612 3613 PetscFunctionBegin; 3614 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3615 if (subset_mult) { 3616 PetscCheckSameComm(subset,1,subset_mult,2); 3617 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3618 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3619 } 3620 /* create workspace layout for computing global indices of subset */ 3621 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3622 lbounds[0] = lbounds[1] = 0; 3623 for (i=0;i<n;i++) { 3624 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3625 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3626 } 3627 lbounds[0] = -lbounds[0]; 3628 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3629 gbounds[0] = -gbounds[0]; 3630 N = gbounds[1] - gbounds[0] + 1; 3631 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3632 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3633 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3634 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3635 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3636 3637 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3638 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3639 if (subset_mult) { 3640 const PetscInt* idxs_mult; 3641 3642 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3643 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3644 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3645 } else { 3646 for (i=0;i<n;i++) leaf_data[i] = 1; 3647 } 3648 /* local size of new subset */ 3649 n_n = 0; 3650 for (i=0;i<n;i++) n_n += leaf_data[i]; 3651 3652 /* global indexes in layout */ 3653 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3654 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3655 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3656 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3657 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3658 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3659 3660 /* reduce from leaves to roots */ 3661 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3662 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3663 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3664 3665 /* count indexes in local part of layout */ 3666 nlocals = 0; 3667 first_index = -1; 3668 first_found = PETSC_FALSE; 3669 for (i=0;i<Nl;i++) { 3670 if (!first_found && root_data[i]) { 3671 first_found = PETSC_TRUE; 3672 first_index = i; 3673 } 3674 nlocals += root_data[i]; 3675 } 3676 3677 /* cumulative of number of indexes and size of subset without holes */ 3678 #if defined(PETSC_HAVE_MPI_EXSCAN) 3679 start = 0; 3680 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3681 #else 3682 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3683 start = start-nlocals; 3684 #endif 3685 3686 if (N_n) { /* compute total size of new subset if requested */ 3687 *N_n = start + nlocals; 3688 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3689 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3690 } 3691 3692 /* adapt root data with cumulative */ 3693 if (first_found) { 3694 PetscInt old_index; 3695 3696 root_data[first_index] += start; 3697 old_index = first_index; 3698 for (i=first_index+1;i<Nl;i++) { 3699 if (root_data[i]) { 3700 root_data[i] += root_data[old_index]; 3701 old_index = i; 3702 } 3703 } 3704 } 3705 3706 /* from roots to leaves */ 3707 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3708 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3709 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3710 3711 /* create new IS with global indexes without holes */ 3712 if (subset_mult) { 3713 const PetscInt* idxs_mult; 3714 PetscInt cum; 3715 3716 cum = 0; 3717 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3718 for (i=0;i<n;i++) { 3719 PetscInt j; 3720 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3721 } 3722 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3723 } else { 3724 for (i=0;i<n;i++) { 3725 gidxs[i] = leaf_data[i]-1; 3726 } 3727 } 3728 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3729 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3730 PetscFunctionReturn(0); 3731 } 3732 3733 #undef __FUNCT__ 3734 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3735 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3736 { 3737 PetscInt i,j; 3738 PetscScalar *alphas; 3739 PetscErrorCode ierr; 3740 3741 PetscFunctionBegin; 3742 /* this implements stabilized Gram-Schmidt */ 3743 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3744 for (i=0;i<n;i++) { 3745 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3746 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3747 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3748 } 3749 ierr = PetscFree(alphas);CHKERRQ(ierr); 3750 PetscFunctionReturn(0); 3751 } 3752 3753 #undef __FUNCT__ 3754 #define __FUNCT__ "MatISGetSubassemblingPattern" 3755 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3756 { 3757 IS ranks_send_to; 3758 PetscInt n_neighs,*neighs,*n_shared,**shared; 3759 PetscMPIInt size,rank,color; 3760 PetscInt *xadj,*adjncy; 3761 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3762 PetscInt i,local_size,threshold=0; 3763 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3764 PetscSubcomm subcomm; 3765 PetscErrorCode ierr; 3766 3767 PetscFunctionBegin; 3768 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3769 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3770 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3771 3772 /* Get info on mapping */ 3773 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3774 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3775 3776 /* build local CSR graph of subdomains' connectivity */ 3777 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3778 xadj[0] = 0; 3779 xadj[1] = PetscMax(n_neighs-1,0); 3780 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3781 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3782 3783 if (threshold) { 3784 PetscInt xadj_count = 0; 3785 for (i=1;i<n_neighs;i++) { 3786 if (n_shared[i] > threshold) { 3787 adjncy[xadj_count] = neighs[i]; 3788 adjncy_wgt[xadj_count] = n_shared[i]; 3789 xadj_count++; 3790 } 3791 } 3792 xadj[1] = xadj_count; 3793 } else { 3794 if (xadj[1]) { 3795 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3796 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3797 } 3798 } 3799 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3800 if (use_square) { 3801 for (i=0;i<xadj[1];i++) { 3802 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3803 } 3804 } 3805 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3806 3807 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3808 3809 /* 3810 Restrict work on active processes only. 3811 */ 3812 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3813 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3814 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3815 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3816 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3817 if (color) { 3818 ierr = PetscFree(xadj);CHKERRQ(ierr); 3819 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3820 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3821 } else { 3822 Mat subdomain_adj; 3823 IS new_ranks,new_ranks_contig; 3824 MatPartitioning partitioner; 3825 PetscInt prank,rstart=0,rend=0; 3826 PetscInt *is_indices,*oldranks; 3827 PetscBool aggregate; 3828 3829 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3830 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3831 prank = rank; 3832 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3833 /* 3834 for (i=0;i<size;i++) { 3835 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3836 } 3837 */ 3838 for (i=0;i<xadj[1];i++) { 3839 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3840 } 3841 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3842 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3843 if (aggregate) { 3844 PetscInt lrows,row,ncols,*cols; 3845 PetscMPIInt nrank; 3846 PetscScalar *vals; 3847 3848 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3849 lrows = 0; 3850 if (nrank<redprocs) { 3851 lrows = size/redprocs; 3852 if (nrank<size%redprocs) lrows++; 3853 } 3854 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3855 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3856 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3857 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3858 row = nrank; 3859 ncols = xadj[1]-xadj[0]; 3860 cols = adjncy; 3861 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3862 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3863 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3864 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3866 ierr = PetscFree(xadj);CHKERRQ(ierr); 3867 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3868 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3869 ierr = PetscFree(vals);CHKERRQ(ierr); 3870 } else { 3871 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3872 } 3873 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3874 3875 /* Partition */ 3876 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3877 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3878 if (use_vwgt) { 3879 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3880 v_wgt[0] = local_size; 3881 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3882 } 3883 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3884 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3885 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3886 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3887 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3888 3889 /* renumber new_ranks to avoid "holes" in new set of processors */ 3890 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3891 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3892 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3893 if (!redprocs) { 3894 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3895 } else { 3896 PetscInt idxs[1]; 3897 PetscMPIInt tag; 3898 MPI_Request *reqs; 3899 3900 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3901 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3902 for (i=rstart;i<rend;i++) { 3903 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3904 } 3905 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3906 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3907 ierr = PetscFree(reqs);CHKERRQ(ierr); 3908 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3909 } 3910 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3911 /* clean up */ 3912 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3913 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3914 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3915 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3916 } 3917 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3918 3919 /* assemble parallel IS for sends */ 3920 i = 1; 3921 if (color) i=0; 3922 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3923 /* get back IS */ 3924 *is_sends = ranks_send_to; 3925 PetscFunctionReturn(0); 3926 } 3927 3928 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3929 3930 #undef __FUNCT__ 3931 #define __FUNCT__ "MatISSubassemble" 3932 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3933 { 3934 Mat local_mat; 3935 IS is_sends_internal; 3936 PetscInt rows,cols,new_local_rows; 3937 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3938 PetscBool ismatis,isdense,newisdense,destroy_mat; 3939 ISLocalToGlobalMapping l2gmap; 3940 PetscInt* l2gmap_indices; 3941 const PetscInt* is_indices; 3942 MatType new_local_type; 3943 /* buffers */ 3944 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3945 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3946 PetscInt *recv_buffer_idxs_local; 3947 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3948 /* MPI */ 3949 MPI_Comm comm,comm_n; 3950 PetscSubcomm subcomm; 3951 PetscMPIInt n_sends,n_recvs,commsize; 3952 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3953 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3954 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3955 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3956 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3957 PetscErrorCode ierr; 3958 3959 PetscFunctionBegin; 3960 /* TODO: add missing checks */ 3961 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3962 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3963 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3964 PetscValidLogicalCollectiveInt(mat,nis,7); 3965 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3966 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3967 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3968 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3969 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3970 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3971 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3972 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3973 PetscInt mrows,mcols,mnrows,mncols; 3974 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3975 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3976 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3977 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3978 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3979 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3980 } 3981 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3982 PetscValidLogicalCollectiveInt(mat,bs,0); 3983 /* prepare IS for sending if not provided */ 3984 if (!is_sends) { 3985 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3986 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3987 } else { 3988 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3989 is_sends_internal = is_sends; 3990 } 3991 3992 /* get comm */ 3993 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3994 3995 /* compute number of sends */ 3996 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3997 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3998 3999 /* compute number of receives */ 4000 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4001 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4002 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4003 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4004 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4005 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4006 ierr = PetscFree(iflags);CHKERRQ(ierr); 4007 4008 /* restrict comm if requested */ 4009 subcomm = 0; 4010 destroy_mat = PETSC_FALSE; 4011 if (restrict_comm) { 4012 PetscMPIInt color,subcommsize; 4013 4014 color = 0; 4015 if (restrict_full) { 4016 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4017 } else { 4018 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4019 } 4020 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4021 subcommsize = commsize - subcommsize; 4022 /* check if reuse has been requested */ 4023 if (reuse == MAT_REUSE_MATRIX) { 4024 if (*mat_n) { 4025 PetscMPIInt subcommsize2; 4026 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4027 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4028 comm_n = PetscObjectComm((PetscObject)*mat_n); 4029 } else { 4030 comm_n = PETSC_COMM_SELF; 4031 } 4032 } else { /* MAT_INITIAL_MATRIX */ 4033 PetscMPIInt rank; 4034 4035 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4036 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4037 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4038 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4039 comm_n = PetscSubcommChild(subcomm); 4040 } 4041 /* flag to destroy *mat_n if not significative */ 4042 if (color) destroy_mat = PETSC_TRUE; 4043 } else { 4044 comm_n = comm; 4045 } 4046 4047 /* prepare send/receive buffers */ 4048 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4049 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4050 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4051 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4052 if (nis) { 4053 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4054 } 4055 4056 /* Get data from local matrices */ 4057 if (!isdense) { 4058 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4059 /* TODO: See below some guidelines on how to prepare the local buffers */ 4060 /* 4061 send_buffer_vals should contain the raw values of the local matrix 4062 send_buffer_idxs should contain: 4063 - MatType_PRIVATE type 4064 - PetscInt size_of_l2gmap 4065 - PetscInt global_row_indices[size_of_l2gmap] 4066 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4067 */ 4068 } else { 4069 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4070 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4071 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4072 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4073 send_buffer_idxs[1] = i; 4074 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4075 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4076 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4077 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4078 for (i=0;i<n_sends;i++) { 4079 ilengths_vals[is_indices[i]] = len*len; 4080 ilengths_idxs[is_indices[i]] = len+2; 4081 } 4082 } 4083 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4084 /* additional is (if any) */ 4085 if (nis) { 4086 PetscMPIInt psum; 4087 PetscInt j; 4088 for (j=0,psum=0;j<nis;j++) { 4089 PetscInt plen; 4090 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4091 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4092 psum += len+1; /* indices + lenght */ 4093 } 4094 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4095 for (j=0,psum=0;j<nis;j++) { 4096 PetscInt plen; 4097 const PetscInt *is_array_idxs; 4098 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4099 send_buffer_idxs_is[psum] = plen; 4100 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4101 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4102 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4103 psum += plen+1; /* indices + lenght */ 4104 } 4105 for (i=0;i<n_sends;i++) { 4106 ilengths_idxs_is[is_indices[i]] = psum; 4107 } 4108 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4109 } 4110 4111 buf_size_idxs = 0; 4112 buf_size_vals = 0; 4113 buf_size_idxs_is = 0; 4114 for (i=0;i<n_recvs;i++) { 4115 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4116 buf_size_vals += (PetscInt)olengths_vals[i]; 4117 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4118 } 4119 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4120 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4121 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4122 4123 /* get new tags for clean communications */ 4124 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4125 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4126 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4127 4128 /* allocate for requests */ 4129 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4130 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4131 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4132 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4133 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4134 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4135 4136 /* communications */ 4137 ptr_idxs = recv_buffer_idxs; 4138 ptr_vals = recv_buffer_vals; 4139 ptr_idxs_is = recv_buffer_idxs_is; 4140 for (i=0;i<n_recvs;i++) { 4141 source_dest = onodes[i]; 4142 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4143 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4144 ptr_idxs += olengths_idxs[i]; 4145 ptr_vals += olengths_vals[i]; 4146 if (nis) { 4147 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4148 ptr_idxs_is += olengths_idxs_is[i]; 4149 } 4150 } 4151 for (i=0;i<n_sends;i++) { 4152 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4153 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4154 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4155 if (nis) { 4156 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4157 } 4158 } 4159 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4160 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4161 4162 /* assemble new l2g map */ 4163 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4164 ptr_idxs = recv_buffer_idxs; 4165 new_local_rows = 0; 4166 for (i=0;i<n_recvs;i++) { 4167 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4168 ptr_idxs += olengths_idxs[i]; 4169 } 4170 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4171 ptr_idxs = recv_buffer_idxs; 4172 new_local_rows = 0; 4173 for (i=0;i<n_recvs;i++) { 4174 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4175 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4176 ptr_idxs += olengths_idxs[i]; 4177 } 4178 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4179 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4180 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4181 4182 /* infer new local matrix type from received local matrices type */ 4183 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4184 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4185 if (n_recvs) { 4186 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4187 ptr_idxs = recv_buffer_idxs; 4188 for (i=0;i<n_recvs;i++) { 4189 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4190 new_local_type_private = MATAIJ_PRIVATE; 4191 break; 4192 } 4193 ptr_idxs += olengths_idxs[i]; 4194 } 4195 switch (new_local_type_private) { 4196 case MATDENSE_PRIVATE: 4197 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4198 new_local_type = MATSEQAIJ; 4199 bs = 1; 4200 } else { /* if I receive only 1 dense matrix */ 4201 new_local_type = MATSEQDENSE; 4202 bs = 1; 4203 } 4204 break; 4205 case MATAIJ_PRIVATE: 4206 new_local_type = MATSEQAIJ; 4207 bs = 1; 4208 break; 4209 case MATBAIJ_PRIVATE: 4210 new_local_type = MATSEQBAIJ; 4211 break; 4212 case MATSBAIJ_PRIVATE: 4213 new_local_type = MATSEQSBAIJ; 4214 break; 4215 default: 4216 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4217 break; 4218 } 4219 } else { /* by default, new_local_type is seqdense */ 4220 new_local_type = MATSEQDENSE; 4221 bs = 1; 4222 } 4223 4224 /* create MATIS object if needed */ 4225 if (reuse == MAT_INITIAL_MATRIX) { 4226 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4227 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4228 } else { 4229 /* it also destroys the local matrices */ 4230 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4231 } 4232 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4233 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4234 4235 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4236 4237 /* Global to local map of received indices */ 4238 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4239 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4240 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4241 4242 /* restore attributes -> type of incoming data and its size */ 4243 buf_size_idxs = 0; 4244 for (i=0;i<n_recvs;i++) { 4245 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4246 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4247 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4248 } 4249 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4250 4251 /* set preallocation */ 4252 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4253 if (!newisdense) { 4254 PetscInt *new_local_nnz=0; 4255 4256 ptr_vals = recv_buffer_vals; 4257 ptr_idxs = recv_buffer_idxs_local; 4258 if (n_recvs) { 4259 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4260 } 4261 for (i=0;i<n_recvs;i++) { 4262 PetscInt j; 4263 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4264 for (j=0;j<*(ptr_idxs+1);j++) { 4265 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4266 } 4267 } else { 4268 /* TODO */ 4269 } 4270 ptr_idxs += olengths_idxs[i]; 4271 } 4272 if (new_local_nnz) { 4273 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4274 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4275 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4276 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4277 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4278 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4279 } else { 4280 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4281 } 4282 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4283 } else { 4284 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4285 } 4286 4287 /* set values */ 4288 ptr_vals = recv_buffer_vals; 4289 ptr_idxs = recv_buffer_idxs_local; 4290 for (i=0;i<n_recvs;i++) { 4291 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4292 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4293 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4294 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4295 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4296 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4297 } else { 4298 /* TODO */ 4299 } 4300 ptr_idxs += olengths_idxs[i]; 4301 ptr_vals += olengths_vals[i]; 4302 } 4303 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4304 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4305 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4306 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4307 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4308 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4309 4310 #if 0 4311 if (!restrict_comm) { /* check */ 4312 Vec lvec,rvec; 4313 PetscReal infty_error; 4314 4315 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4316 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4317 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4318 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4319 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4320 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4321 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4322 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4323 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4324 } 4325 #endif 4326 4327 /* assemble new additional is (if any) */ 4328 if (nis) { 4329 PetscInt **temp_idxs,*count_is,j,psum; 4330 4331 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4332 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4333 ptr_idxs = recv_buffer_idxs_is; 4334 psum = 0; 4335 for (i=0;i<n_recvs;i++) { 4336 for (j=0;j<nis;j++) { 4337 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4338 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4339 psum += plen; 4340 ptr_idxs += plen+1; /* shift pointer to received data */ 4341 } 4342 } 4343 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4344 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4345 for (i=1;i<nis;i++) { 4346 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4347 } 4348 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4349 ptr_idxs = recv_buffer_idxs_is; 4350 for (i=0;i<n_recvs;i++) { 4351 for (j=0;j<nis;j++) { 4352 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4353 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4354 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4355 ptr_idxs += plen+1; /* shift pointer to received data */ 4356 } 4357 } 4358 for (i=0;i<nis;i++) { 4359 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4360 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4361 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4362 } 4363 ierr = PetscFree(count_is);CHKERRQ(ierr); 4364 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4365 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4366 } 4367 /* free workspace */ 4368 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4369 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4370 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4371 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4372 if (isdense) { 4373 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4374 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4375 } else { 4376 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4377 } 4378 if (nis) { 4379 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4380 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4381 } 4382 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4383 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4384 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4385 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4386 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4387 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4388 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4389 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4390 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4391 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4392 ierr = PetscFree(onodes);CHKERRQ(ierr); 4393 if (nis) { 4394 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4395 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4396 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4397 } 4398 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4399 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4400 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4401 for (i=0;i<nis;i++) { 4402 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4403 } 4404 *mat_n = NULL; 4405 } 4406 PetscFunctionReturn(0); 4407 } 4408 4409 /* temporary hack into ksp private data structure */ 4410 #include <petsc/private/kspimpl.h> 4411 4412 #undef __FUNCT__ 4413 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4414 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4415 { 4416 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4417 PC_IS *pcis = (PC_IS*)pc->data; 4418 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4419 MatNullSpace CoarseNullSpace=NULL; 4420 ISLocalToGlobalMapping coarse_islg; 4421 IS coarse_is,*isarray; 4422 PetscInt i,im_active=-1,active_procs=-1; 4423 PetscInt nis,nisdofs,nisneu; 4424 PC pc_temp; 4425 PCType coarse_pc_type; 4426 KSPType coarse_ksp_type; 4427 PetscBool multilevel_requested,multilevel_allowed; 4428 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4429 Mat t_coarse_mat_is; 4430 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4431 PetscMPIInt all_procs; 4432 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4433 PetscBool compute_vecs = PETSC_FALSE; 4434 PetscScalar *array; 4435 PetscErrorCode ierr; 4436 4437 PetscFunctionBegin; 4438 /* Assign global numbering to coarse dofs */ 4439 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4440 PetscInt ocoarse_size; 4441 compute_vecs = PETSC_TRUE; 4442 ocoarse_size = pcbddc->coarse_size; 4443 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4444 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4445 /* see if we can avoid some work */ 4446 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4447 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4448 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4449 PC pc; 4450 PetscBool isbddc; 4451 4452 /* temporary workaround since PCBDDC does not have a reset method so far */ 4453 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4454 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4455 if (isbddc) { 4456 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4457 } 4458 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4459 coarse_reuse = PETSC_FALSE; 4460 } else { /* we can safely reuse already computed coarse matrix */ 4461 coarse_reuse = PETSC_TRUE; 4462 } 4463 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4464 coarse_reuse = PETSC_FALSE; 4465 } 4466 /* reset any subassembling information */ 4467 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4468 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4469 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4470 coarse_reuse = PETSC_TRUE; 4471 } 4472 4473 /* count "active" (i.e. with positive local size) and "void" processes */ 4474 im_active = !!(pcis->n); 4475 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4476 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4477 void_procs = all_procs-active_procs; 4478 csin_type_simple = PETSC_TRUE; 4479 redist = PETSC_FALSE; 4480 if (pcbddc->current_level && void_procs) { 4481 csin_ml = PETSC_TRUE; 4482 ncoarse_ml = void_procs; 4483 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4484 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4485 csin_ds = PETSC_TRUE; 4486 ncoarse_ds = pcbddc->redistribute_coarse; 4487 redist = PETSC_TRUE; 4488 } else { 4489 csin_ds = PETSC_TRUE; 4490 ncoarse_ds = active_procs; 4491 redist = PETSC_TRUE; 4492 } 4493 } else { 4494 csin_ml = PETSC_FALSE; 4495 ncoarse_ml = all_procs; 4496 if (void_procs) { 4497 csin_ds = PETSC_TRUE; 4498 ncoarse_ds = void_procs; 4499 csin_type_simple = PETSC_FALSE; 4500 } else { 4501 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4502 csin_ds = PETSC_TRUE; 4503 ncoarse_ds = pcbddc->redistribute_coarse; 4504 redist = PETSC_TRUE; 4505 } else { 4506 csin_ds = PETSC_FALSE; 4507 ncoarse_ds = all_procs; 4508 } 4509 } 4510 } 4511 4512 /* 4513 test if we can go multilevel: three conditions must be satisfied: 4514 - we have not exceeded the number of levels requested 4515 - we can actually subassemble the active processes 4516 - we can find a suitable number of MPI processes where we can place the subassembled problem 4517 */ 4518 multilevel_allowed = PETSC_FALSE; 4519 multilevel_requested = PETSC_FALSE; 4520 if (pcbddc->current_level < pcbddc->max_levels) { 4521 multilevel_requested = PETSC_TRUE; 4522 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4523 multilevel_allowed = PETSC_FALSE; 4524 } else { 4525 multilevel_allowed = PETSC_TRUE; 4526 } 4527 } 4528 /* determine number of process partecipating to coarse solver */ 4529 if (multilevel_allowed) { 4530 ncoarse = ncoarse_ml; 4531 csin = csin_ml; 4532 redist = PETSC_FALSE; 4533 } else { 4534 ncoarse = ncoarse_ds; 4535 csin = csin_ds; 4536 } 4537 4538 /* creates temporary l2gmap and IS for coarse indexes */ 4539 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4540 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4541 4542 /* creates temporary MATIS object for coarse matrix */ 4543 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4544 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4545 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4546 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4547 #if 0 4548 { 4549 PetscViewer viewer; 4550 char filename[256]; 4551 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4552 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4553 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4554 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4555 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4556 } 4557 #endif 4558 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4559 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4560 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4561 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4562 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4563 4564 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4565 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4566 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4567 const PetscInt *idxs; 4568 ISLocalToGlobalMapping tmap; 4569 4570 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4571 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4572 /* allocate space for temporary storage */ 4573 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4574 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4575 /* allocate for IS array */ 4576 nisdofs = pcbddc->n_ISForDofsLocal; 4577 nisneu = !!pcbddc->NeumannBoundariesLocal; 4578 nis = nisdofs + nisneu; 4579 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4580 /* dofs splitting */ 4581 for (i=0;i<nisdofs;i++) { 4582 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4583 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4584 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4585 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4586 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4587 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4588 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4589 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4590 } 4591 /* neumann boundaries */ 4592 if (pcbddc->NeumannBoundariesLocal) { 4593 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4594 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4595 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4596 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4597 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4598 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4599 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4600 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4601 } 4602 /* free memory */ 4603 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4604 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4605 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4606 } else { 4607 nis = 0; 4608 nisdofs = 0; 4609 nisneu = 0; 4610 isarray = NULL; 4611 } 4612 /* destroy no longer needed map */ 4613 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4614 4615 /* restrict on coarse candidates (if needed) */ 4616 coarse_mat_is = NULL; 4617 if (csin) { 4618 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4619 if (redist) { 4620 PetscMPIInt rank; 4621 PetscInt spc,n_spc_p1,dest[1],destsize; 4622 4623 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4624 spc = active_procs/ncoarse; 4625 n_spc_p1 = active_procs%ncoarse; 4626 if (im_active) { 4627 destsize = 1; 4628 if (rank > n_spc_p1*(spc+1)-1) { 4629 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4630 } else { 4631 dest[0] = rank/(spc+1); 4632 } 4633 } else { 4634 destsize = 0; 4635 } 4636 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4637 } else if (csin_type_simple) { 4638 PetscMPIInt rank; 4639 PetscInt issize,isidx; 4640 4641 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4642 if (im_active) { 4643 issize = 1; 4644 isidx = (PetscInt)rank; 4645 } else { 4646 issize = 0; 4647 isidx = -1; 4648 } 4649 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4650 } else { /* get a suitable subassembling pattern from MATIS code */ 4651 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4652 } 4653 4654 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4655 if (!redist || ncoarse <= void_procs) { 4656 PetscInt ncoarse_cand,tissize,*nisindices; 4657 PetscInt *coarse_candidates; 4658 const PetscInt* tisindices; 4659 4660 /* get coarse candidates' ranks in pc communicator */ 4661 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4662 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4663 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4664 if (!coarse_candidates[i]) { 4665 coarse_candidates[ncoarse_cand++]=i; 4666 } 4667 } 4668 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4669 4670 4671 if (pcbddc->dbg_flag) { 4672 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4673 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4674 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4675 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4676 for (i=0;i<ncoarse_cand;i++) { 4677 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4678 } 4679 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4680 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4681 } 4682 /* shift the pattern on coarse candidates */ 4683 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4684 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4685 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4686 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4687 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4688 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4689 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4690 } 4691 if (pcbddc->dbg_flag) { 4692 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4693 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4694 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4695 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4696 } 4697 } 4698 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4699 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4700 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4701 } else { /* this is the last level, so use just receiving processes in subcomm */ 4702 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4703 } 4704 } else { 4705 if (pcbddc->dbg_flag) { 4706 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4707 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4708 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4709 } 4710 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4711 coarse_mat_is = t_coarse_mat_is; 4712 } 4713 4714 /* create local to global scatters for coarse problem */ 4715 if (compute_vecs) { 4716 PetscInt lrows; 4717 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4718 if (coarse_mat_is) { 4719 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4720 } else { 4721 lrows = 0; 4722 } 4723 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4724 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4725 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4726 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4727 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4728 } 4729 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4730 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4731 4732 /* set defaults for coarse KSP and PC */ 4733 if (multilevel_allowed) { 4734 coarse_ksp_type = KSPRICHARDSON; 4735 coarse_pc_type = PCBDDC; 4736 } else { 4737 coarse_ksp_type = KSPPREONLY; 4738 coarse_pc_type = PCREDUNDANT; 4739 } 4740 4741 /* print some info if requested */ 4742 if (pcbddc->dbg_flag) { 4743 if (!multilevel_allowed) { 4744 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4745 if (multilevel_requested) { 4746 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4747 } else if (pcbddc->max_levels) { 4748 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4749 } 4750 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4751 } 4752 } 4753 4754 /* create the coarse KSP object only once with defaults */ 4755 if (coarse_mat_is) { 4756 MatReuse coarse_mat_reuse; 4757 PetscViewer dbg_viewer = NULL; 4758 if (pcbddc->dbg_flag) { 4759 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4760 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4761 } 4762 if (!pcbddc->coarse_ksp) { 4763 char prefix[256],str_level[16]; 4764 size_t len; 4765 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4766 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4767 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4768 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4769 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4770 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4771 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4772 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4773 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4774 /* prefix */ 4775 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4776 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4777 if (!pcbddc->current_level) { 4778 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4779 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4780 } else { 4781 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4782 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4783 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4784 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4785 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4786 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4787 } 4788 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4789 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4790 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4791 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4792 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4793 /* allow user customization */ 4794 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4795 } 4796 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4797 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4798 if (nisdofs) { 4799 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4800 for (i=0;i<nisdofs;i++) { 4801 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4802 } 4803 } 4804 if (nisneu) { 4805 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4806 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4807 } 4808 4809 /* get some info after set from options */ 4810 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4811 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4812 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4813 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4814 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4815 isbddc = PETSC_FALSE; 4816 } 4817 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4818 if (isredundant) { 4819 KSP inner_ksp; 4820 PC inner_pc; 4821 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4822 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4823 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4824 } 4825 4826 /* assemble coarse matrix */ 4827 if (coarse_reuse) { 4828 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4829 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4830 coarse_mat_reuse = MAT_REUSE_MATRIX; 4831 } else { 4832 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4833 } 4834 if (isbddc || isnn) { 4835 if (pcbddc->coarsening_ratio > 1) { 4836 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4837 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4838 if (pcbddc->dbg_flag) { 4839 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4840 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4841 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4842 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4843 } 4844 } 4845 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4846 } else { 4847 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4848 coarse_mat = coarse_mat_is; 4849 } 4850 } else { 4851 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4852 } 4853 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4854 4855 /* propagate symmetry info of coarse matrix */ 4856 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4857 if (pc->pmat->symmetric_set) { 4858 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4859 } 4860 if (pc->pmat->hermitian_set) { 4861 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4862 } 4863 if (pc->pmat->spd_set) { 4864 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4865 } 4866 /* set operators */ 4867 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4868 if (pcbddc->dbg_flag) { 4869 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4870 } 4871 } else { /* processes non partecipating to coarse solver (if any) */ 4872 coarse_mat = 0; 4873 } 4874 ierr = PetscFree(isarray);CHKERRQ(ierr); 4875 #if 0 4876 { 4877 PetscViewer viewer; 4878 char filename[256]; 4879 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 4880 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 4881 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4882 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4883 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4884 } 4885 #endif 4886 4887 /* Compute coarse null space (special handling by BDDC only) */ 4888 #if 0 4889 if (pcbddc->NullSpace) { 4890 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4891 } 4892 #endif 4893 4894 if (pcbddc->coarse_ksp) { 4895 Vec crhs,csol; 4896 PetscBool ispreonly; 4897 4898 if (CoarseNullSpace) { 4899 if (isbddc) { 4900 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4901 } else { 4902 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4903 } 4904 } 4905 /* setup coarse ksp */ 4906 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4907 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4908 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4909 /* hack */ 4910 if (!csol) { 4911 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4912 } 4913 if (!crhs) { 4914 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4915 } 4916 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4917 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4918 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4919 KSP check_ksp; 4920 KSPType check_ksp_type; 4921 PC check_pc; 4922 Vec check_vec,coarse_vec; 4923 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4924 PetscInt its; 4925 PetscBool compute_eigs; 4926 PetscReal *eigs_r,*eigs_c; 4927 PetscInt neigs; 4928 const char *prefix; 4929 4930 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4931 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4932 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4933 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4934 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4935 if (ispreonly) { 4936 check_ksp_type = KSPPREONLY; 4937 compute_eigs = PETSC_FALSE; 4938 } else { 4939 check_ksp_type = KSPGMRES; 4940 compute_eigs = PETSC_TRUE; 4941 } 4942 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4943 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4944 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4945 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4946 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4947 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4948 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4949 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4950 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4951 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4952 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4953 /* create random vec */ 4954 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4955 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4956 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4957 if (CoarseNullSpace) { 4958 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4959 } 4960 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4961 /* solve coarse problem */ 4962 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4963 if (CoarseNullSpace) { 4964 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4965 } 4966 /* set eigenvalue estimation if preonly has not been requested */ 4967 if (compute_eigs) { 4968 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4969 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4970 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4971 lambda_max = eigs_r[neigs-1]; 4972 lambda_min = eigs_r[0]; 4973 if (pcbddc->use_coarse_estimates) { 4974 if (lambda_max>lambda_min) { 4975 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4976 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4977 } 4978 } 4979 } 4980 4981 /* check coarse problem residual error */ 4982 if (pcbddc->dbg_flag) { 4983 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4984 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4985 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4986 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4987 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4988 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4989 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4990 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4991 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4992 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4993 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4994 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4995 if (compute_eigs) { 4996 PetscReal lambda_max_s,lambda_min_s; 4997 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4998 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4999 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5000 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5001 for (i=0;i<neigs;i++) { 5002 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5003 } 5004 } 5005 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5006 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5007 } 5008 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5009 if (compute_eigs) { 5010 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5011 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5012 } 5013 } 5014 } 5015 /* print additional info */ 5016 if (pcbddc->dbg_flag) { 5017 /* waits until all processes reaches this point */ 5018 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5019 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5020 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5021 } 5022 5023 /* free memory */ 5024 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5025 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5026 PetscFunctionReturn(0); 5027 } 5028 5029 #undef __FUNCT__ 5030 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5031 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5032 { 5033 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5034 PC_IS* pcis = (PC_IS*)pc->data; 5035 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5036 IS subset,subset_mult,subset_n; 5037 PetscInt local_size,coarse_size=0; 5038 PetscInt *local_primal_indices=NULL; 5039 const PetscInt *t_local_primal_indices; 5040 PetscErrorCode ierr; 5041 5042 PetscFunctionBegin; 5043 /* Compute global number of coarse dofs */ 5044 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5045 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5046 } 5047 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5048 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5049 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5050 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5051 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5052 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5053 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5054 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5055 if (local_size != pcbddc->local_primal_size) { 5056 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5057 } 5058 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5059 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5060 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5061 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5062 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5063 5064 /* check numbering */ 5065 if (pcbddc->dbg_flag) { 5066 PetscScalar coarsesum,*array,*array2; 5067 PetscInt i; 5068 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5069 5070 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5071 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5072 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5073 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 5074 /* counter */ 5075 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5076 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5077 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5078 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5079 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5080 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5081 5082 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5083 for (i=0;i<pcbddc->local_primal_size;i++) { 5084 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5085 } 5086 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5087 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5088 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5089 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5090 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5091 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5092 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5093 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5094 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5095 for (i=0;i<pcis->n;i++) { 5096 if (array[i] != 0.0 && array[i] != array2[i]) { 5097 PetscInt owned = (PetscInt)(array[i]); 5098 PetscInt neigh = (PetscInt)(array2[i]); 5099 set_error = PETSC_TRUE; 5100 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5101 } 5102 } 5103 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5104 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5105 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5106 for (i=0;i<pcis->n;i++) { 5107 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5108 } 5109 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5110 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5111 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5112 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5113 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5114 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5115 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5116 PetscInt *gidxs; 5117 5118 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5119 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5120 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5121 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5122 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5123 for (i=0;i<pcbddc->local_primal_size;i++) { 5124 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5125 } 5126 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5127 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5128 } 5129 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5130 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5131 } 5132 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5133 /* get back data */ 5134 *coarse_size_n = coarse_size; 5135 *local_primal_indices_n = local_primal_indices; 5136 PetscFunctionReturn(0); 5137 } 5138 5139 #undef __FUNCT__ 5140 #define __FUNCT__ "PCBDDCGlobalToLocal" 5141 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5142 { 5143 IS localis_t; 5144 PetscInt i,lsize,*idxs,n; 5145 PetscScalar *vals; 5146 PetscErrorCode ierr; 5147 5148 PetscFunctionBegin; 5149 /* get indices in local ordering exploiting local to global map */ 5150 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5151 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5152 for (i=0;i<lsize;i++) vals[i] = 1.0; 5153 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5154 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5155 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5156 if (idxs) { /* multilevel guard */ 5157 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5158 } 5159 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5160 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5161 ierr = PetscFree(vals);CHKERRQ(ierr); 5162 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5163 /* now compute set in local ordering */ 5164 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5165 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5166 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5167 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5168 for (i=0,lsize=0;i<n;i++) { 5169 if (PetscRealPart(vals[i]) > 0.5) { 5170 lsize++; 5171 } 5172 } 5173 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5174 for (i=0,lsize=0;i<n;i++) { 5175 if (PetscRealPart(vals[i]) > 0.5) { 5176 idxs[lsize++] = i; 5177 } 5178 } 5179 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5180 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5181 *localis = localis_t; 5182 PetscFunctionReturn(0); 5183 } 5184 5185 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5186 #undef __FUNCT__ 5187 #define __FUNCT__ "PCBDDCMatMult_Private" 5188 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5189 { 5190 PCBDDCChange_ctx change_ctx; 5191 PetscErrorCode ierr; 5192 5193 PetscFunctionBegin; 5194 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5195 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5196 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5197 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5198 PetscFunctionReturn(0); 5199 } 5200 5201 #undef __FUNCT__ 5202 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5203 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5204 { 5205 PCBDDCChange_ctx change_ctx; 5206 PetscErrorCode ierr; 5207 5208 PetscFunctionBegin; 5209 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5210 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5211 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5212 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5213 PetscFunctionReturn(0); 5214 } 5215 5216 #undef __FUNCT__ 5217 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5218 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5219 { 5220 PC_IS *pcis=(PC_IS*)pc->data; 5221 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5222 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5223 Mat S_j; 5224 PetscInt *used_xadj,*used_adjncy; 5225 PetscBool free_used_adj; 5226 PetscErrorCode ierr; 5227 5228 PetscFunctionBegin; 5229 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5230 free_used_adj = PETSC_FALSE; 5231 if (pcbddc->sub_schurs_layers == -1) { 5232 used_xadj = NULL; 5233 used_adjncy = NULL; 5234 } else { 5235 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5236 used_xadj = pcbddc->mat_graph->xadj; 5237 used_adjncy = pcbddc->mat_graph->adjncy; 5238 } else if (pcbddc->computed_rowadj) { 5239 used_xadj = pcbddc->mat_graph->xadj; 5240 used_adjncy = pcbddc->mat_graph->adjncy; 5241 } else { 5242 PetscBool flg_row=PETSC_FALSE; 5243 const PetscInt *xadj,*adjncy; 5244 PetscInt nvtxs; 5245 5246 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5247 if (flg_row) { 5248 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5249 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5250 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5251 free_used_adj = PETSC_TRUE; 5252 } else { 5253 pcbddc->sub_schurs_layers = -1; 5254 used_xadj = NULL; 5255 used_adjncy = NULL; 5256 } 5257 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5258 } 5259 } 5260 5261 /* setup sub_schurs data */ 5262 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5263 if (!sub_schurs->use_mumps) { 5264 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5265 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5266 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5267 } else { 5268 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5269 PetscBool isseqaij; 5270 if (!pcbddc->use_vertices && reuse_solvers) { 5271 PetscInt n_vertices; 5272 5273 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5274 reuse_solvers = (PetscBool)!n_vertices; 5275 } 5276 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5277 if (!isseqaij) { 5278 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5279 if (matis->A == pcbddc->local_mat) { 5280 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5281 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5282 } else { 5283 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5284 } 5285 } 5286 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5287 } 5288 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5289 5290 /* free adjacency */ 5291 if (free_used_adj) { 5292 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5293 } 5294 PetscFunctionReturn(0); 5295 } 5296 5297 #undef __FUNCT__ 5298 #define __FUNCT__ "PCBDDCInitSubSchurs" 5299 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5300 { 5301 PC_IS *pcis=(PC_IS*)pc->data; 5302 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5303 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5304 PCBDDCGraph graph; 5305 PetscErrorCode ierr; 5306 5307 PetscFunctionBegin; 5308 /* attach interface graph for determining subsets */ 5309 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5310 IS verticesIS,verticescomm; 5311 PetscInt vsize,*idxs; 5312 5313 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5314 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5315 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5316 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5317 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5318 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5319 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5320 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5321 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5322 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5323 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5324 /* 5325 if (pcbddc->dbg_flag) { 5326 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5327 } 5328 */ 5329 } else { 5330 graph = pcbddc->mat_graph; 5331 } 5332 5333 /* sub_schurs init */ 5334 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5335 5336 /* free graph struct */ 5337 if (pcbddc->sub_schurs_rebuild) { 5338 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5339 } 5340 PetscFunctionReturn(0); 5341 } 5342 5343 #undef __FUNCT__ 5344 #define __FUNCT__ "PCBDDCCheckOperator" 5345 PetscErrorCode PCBDDCCheckOperator(PC pc) 5346 { 5347 PC_IS *pcis=(PC_IS*)pc->data; 5348 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5349 PetscErrorCode ierr; 5350 5351 PetscFunctionBegin; 5352 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5353 IS zerodiag = NULL; 5354 Mat S_j,B0=NULL,B0_B=NULL; 5355 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5356 PetscScalar norm,p0_check,*array,*array2; 5357 PetscInt i; 5358 5359 /* B0 and B0_B */ 5360 if (zerodiag) { 5361 IS dummy; 5362 PetscInt ii[2]; 5363 5364 ii[0] = 0; 5365 ii[1] = pcbddc->B0_ncol; 5366 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 5367 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 5368 ierr = MatGetSubMatrix(B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5369 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5370 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5371 } 5372 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5373 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5374 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5375 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5376 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5377 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5378 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5379 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5380 /* S_j */ 5381 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5382 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5383 5384 /* mimic vector in \widetilde{W}_\Gamma */ 5385 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5386 /* continuous in primal space */ 5387 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5388 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5389 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5390 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5391 if (zerodiag) { 5392 p0_check = array[pcbddc->local_primal_size-1]; 5393 } else { 5394 p0_check = 0; 5395 } 5396 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5397 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5398 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5399 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5400 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5401 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5402 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5403 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5404 5405 /* assemble rhs for coarse problem */ 5406 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5407 /* local with Schur */ 5408 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5409 if (zerodiag) { 5410 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5411 array[0] = p0_check; 5412 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5413 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5414 } 5415 /* sum on primal nodes the local contributions */ 5416 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5417 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5418 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5419 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5420 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5421 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5422 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5423 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5424 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5425 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5426 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5427 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5428 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5429 /* scale primal nodes (BDDC sums contibutions) */ 5430 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5431 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5432 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5433 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5434 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5435 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5436 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5437 /* global: \widetilde{B0}_B w_\Gamma */ 5438 if (zerodiag) { 5439 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5440 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5441 pcbddc->benign_p0 = array[0]; 5442 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5443 } else { 5444 pcbddc->benign_p0 = 0.; 5445 } 5446 /* BDDC */ 5447 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5448 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5449 5450 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5451 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5452 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5453 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5454 if (pcbddc->benign_p0_lidx >= 0) { 5455 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0 error is %1.4e\n",PetscGlobalRank,PetscAbsScalar(pcbddc->benign_p0-p0_check)); 5456 } 5457 5458 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5459 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5460 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5461 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5462 ierr = MatDestroy(&B0);CHKERRQ(ierr); 5463 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5464 } 5465 PetscFunctionReturn(0); 5466 } 5467