1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignPopOrPushP0" 10 PetscErrorCode PCBDDCBenignPopOrPushP0(PC pc, Vec v, PetscBool pop) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 if (!pcbddc->benign_sf) { 17 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 18 if (pcbddc->benign_p0_gidx >= 0) { 19 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 20 } else { 21 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 22 } 23 } 24 if (pop) { /* use SF to get values */ 25 PetscScalar *array; 26 27 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 28 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 29 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 30 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 31 } else { /* use VecSetValue */ 32 if (pcbddc->benign_p0_gidx >= 0) { 33 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 34 } 35 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 36 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 43 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 44 { 45 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 46 PetscErrorCode ierr; 47 48 PetscFunctionBegin; 49 /* TODO: add error checking 50 - avoid nested pop (or push) calls. 51 - cannot push before pop. 52 - cannot call this if pcbddc->local_mat is NULL 53 */ 54 if (pcbddc->benign_p0_lidx < 0) { 55 PetscFunctionReturn(0); 56 } 57 if (pop) { 58 const PetscInt *cB0_cols; 59 PetscInt cB0_ncol; 60 const PetscScalar *cB0_vals; 61 62 /* extract B_0 */ 63 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 64 pcbddc->B0_ncol = cB0_ncol; 65 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 66 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 67 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 68 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 69 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 70 /* remove rows and cols from local problem */ 71 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 72 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 73 } else { /* push */ 74 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 75 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 76 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 77 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 78 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 79 } 80 PetscFunctionReturn(0); 81 } 82 83 #undef __FUNCT__ 84 #define __FUNCT__ "PCBDDCAdaptiveSelection" 85 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 86 { 87 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 88 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 89 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 90 PetscBLASInt *B_iwork,*B_ifail; 91 PetscScalar *work,lwork; 92 PetscScalar *St,*S,*eigv; 93 PetscScalar *Sarray,*Starray; 94 PetscReal *eigs,thresh; 95 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 96 PetscBool allocated_S_St; 97 #if defined(PETSC_USE_COMPLEX) 98 PetscReal *rwork; 99 #endif 100 PetscErrorCode ierr; 101 102 PetscFunctionBegin; 103 if (!sub_schurs->use_mumps) { 104 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 105 } 106 107 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 108 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 109 } 110 111 if (pcbddc->dbg_flag) { 112 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 113 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 114 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 115 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 116 } 117 118 if (pcbddc->dbg_flag) { 119 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 120 } 121 122 /* max size of subsets */ 123 mss = 0; 124 for (i=0;i<sub_schurs->n_subs;i++) { 125 PetscInt subset_size; 126 127 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 128 mss = PetscMax(mss,subset_size); 129 } 130 131 /* min/max and threshold */ 132 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 133 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 134 nmax = PetscMax(nmin,nmax); 135 allocated_S_St = PETSC_FALSE; 136 if (nmin) { 137 allocated_S_St = PETSC_TRUE; 138 } 139 140 /* allocate lapack workspace */ 141 cum = cum2 = 0; 142 maxneigs = 0; 143 for (i=0;i<sub_schurs->n_subs;i++) { 144 PetscInt n,subset_size; 145 146 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 147 n = PetscMin(subset_size,nmax); 148 cum += subset_size; 149 cum2 += subset_size*n; 150 maxneigs = PetscMax(maxneigs,n); 151 } 152 if (mss) { 153 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 154 PetscBLASInt B_itype = 1; 155 PetscBLASInt B_N = mss; 156 PetscReal zero = 0.0; 157 PetscReal eps = 0.0; /* dlamch? */ 158 159 B_lwork = -1; 160 S = NULL; 161 St = NULL; 162 eigs = NULL; 163 eigv = NULL; 164 B_iwork = NULL; 165 B_ifail = NULL; 166 #if defined(PETSC_USE_COMPLEX) 167 rwork = NULL; 168 #endif 169 thresh = 1.0; 170 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 171 #if defined(PETSC_USE_COMPLEX) 172 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 173 #else 174 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 175 #endif 176 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 177 ierr = PetscFPTrapPop();CHKERRQ(ierr); 178 } else { 179 /* TODO */ 180 } 181 } else { 182 lwork = 0; 183 } 184 185 nv = 0; 186 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 187 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 188 } 189 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 190 if (allocated_S_St) { 191 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 192 } 193 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 194 #if defined(PETSC_USE_COMPLEX) 195 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 196 #endif 197 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 198 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 199 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 200 nv+cum,&pcbddc->adaptive_constraints_idxs, 201 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 202 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 203 204 maxneigs = 0; 205 cum = cum2 = cumarray = 0; 206 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 207 pcbddc->adaptive_constraints_data_ptr[0] = 0; 208 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 209 const PetscInt *idxs; 210 211 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 212 for (cum=0;cum<nv;cum++) { 213 pcbddc->adaptive_constraints_n[cum] = 1; 214 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 215 pcbddc->adaptive_constraints_data[cum] = 1.0; 216 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 217 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 218 } 219 cum2 = cum; 220 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 221 } 222 223 if (mss) { /* multilevel */ 224 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 225 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 226 } 227 228 for (i=0;i<sub_schurs->n_subs;i++) { 229 230 const PetscInt *idxs; 231 PetscReal infty = PETSC_MAX_REAL; 232 PetscInt j,subset_size,eigs_start = 0; 233 PetscBLASInt B_N; 234 PetscBool same_data = PETSC_FALSE; 235 236 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 237 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 238 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 239 if (sub_schurs->is_hermitian) { 240 PetscInt j,k; 241 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 242 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 244 } 245 for (j=0;j<subset_size;j++) { 246 for (k=j;k<subset_size;k++) { 247 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 248 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 249 } 250 } 251 } else { 252 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 254 } 255 } else { 256 S = Sarray + cumarray; 257 St = Starray + cumarray; 258 } 259 260 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 261 /* see if we can save some work */ 262 if (sub_schurs->n_subs == 1) { 263 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 264 } 265 266 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 267 B_neigs = 0; 268 } else { 269 /* Threshold: this is an heuristic for edges */ 270 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 271 272 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 273 PetscBLASInt B_itype = 1; 274 PetscBLASInt B_IL, B_IU; 275 PetscReal eps = -1.0; /* dlamch? */ 276 PetscInt nmin_s; 277 278 if (pcbddc->dbg_flag) { 279 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 280 } 281 282 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 283 if (thresh > 1.+PETSC_SMALL) { 284 285 /* ask for eigenvalues larger than thresh */ 286 #if defined(PETSC_USE_COMPLEX) 287 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 288 #else 289 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 290 #endif 291 } else { 292 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 293 B_IL = 1; 294 #if defined(PETSC_USE_COMPLEX) 295 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 296 #else 297 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 298 #endif 299 } 300 ierr = PetscFPTrapPop();CHKERRQ(ierr); 301 if (B_ierr) { 302 if (B_ierr < 0 ) { 303 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 304 } else if (B_ierr <= B_N) { 305 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 306 } else { 307 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 308 } 309 } 310 311 if (B_neigs > nmax) { 312 if (pcbddc->dbg_flag) { 313 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 314 } 315 eigs_start = B_neigs -nmax; 316 B_neigs = nmax; 317 } 318 319 nmin_s = PetscMin(nmin,B_N); 320 if (B_neigs < nmin_s) { 321 PetscBLASInt B_neigs2; 322 323 B_IU = B_N - B_neigs; 324 B_IL = B_N - nmin_s + 1; 325 if (pcbddc->dbg_flag) { 326 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 327 } 328 if (sub_schurs->is_hermitian) { 329 PetscInt j; 330 for (j=0;j<subset_size;j++) { 331 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 332 } 333 for (j=0;j<subset_size;j++) { 334 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 } else { 337 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 338 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 339 } 340 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 341 #if defined(PETSC_USE_COMPLEX) 342 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 343 #else 344 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 345 #endif 346 ierr = PetscFPTrapPop();CHKERRQ(ierr); 347 B_neigs += B_neigs2; 348 } 349 if (B_ierr) { 350 if (B_ierr < 0 ) { 351 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 352 } else if (B_ierr <= B_N) { 353 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 354 } else { 355 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 356 } 357 } 358 if (pcbddc->dbg_flag) { 359 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 360 for (j=0;j<B_neigs;j++) { 361 if (eigs[j] == 0.0) { 362 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 363 } else { 364 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 365 } 366 } 367 } 368 } else { 369 /* TODO */ 370 } 371 } 372 maxneigs = PetscMax(B_neigs,maxneigs); 373 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 374 if (B_neigs) { 375 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 376 377 if (pcbddc->dbg_flag > 1) { 378 PetscInt ii; 379 for (ii=0;ii<B_neigs;ii++) { 380 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 381 for (j=0;j<B_N;j++) { 382 #if defined(PETSC_USE_COMPLEX) 383 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 384 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 385 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 386 #else 387 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 388 #endif 389 } 390 } 391 } 392 #if 0 393 for (j=0;j<B_neigs;j++) { 394 PetscBLASInt Blas_N,Blas_one = 1.0; 395 PetscScalar norm; 396 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 397 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 398 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 399 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 400 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 401 } else { 402 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 403 } 404 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 405 } 406 #endif 407 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 408 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 409 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 410 cum++; 411 } 412 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 413 /* shift for next computation */ 414 cumarray += subset_size*subset_size; 415 } 416 if (pcbddc->dbg_flag) { 417 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 418 } 419 420 if (mss) { 421 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 422 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 423 /* destroy matrices (junk) */ 424 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 425 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 426 } 427 if (allocated_S_St) { 428 ierr = PetscFree2(S,St);CHKERRQ(ierr); 429 } 430 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 431 #if defined(PETSC_USE_COMPLEX) 432 ierr = PetscFree(rwork);CHKERRQ(ierr); 433 #endif 434 if (pcbddc->dbg_flag) { 435 PetscInt maxneigs_r; 436 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 437 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 438 } 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "PCBDDCSetUpSolvers" 444 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 445 { 446 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 447 PetscScalar *coarse_submat_vals; 448 PetscErrorCode ierr; 449 450 PetscFunctionBegin; 451 /* Setup local scatters R_to_B and (optionally) R_to_D */ 452 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 453 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 454 455 /* Setup local neumann solver ksp_R */ 456 /* PCBDDCSetUpLocalScatters should be called first! */ 457 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 458 459 /* Change global null space passed in by the user if change of basis has been requested */ 460 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 461 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 462 } 463 464 /* 465 Setup local correction and local part of coarse basis. 466 Gives back the dense local part of the coarse matrix in column major ordering 467 */ 468 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 469 470 /* Compute total number of coarse nodes and setup coarse solver */ 471 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 472 473 /* free */ 474 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "PCBDDCResetCustomization" 480 PetscErrorCode PCBDDCResetCustomization(PC pc) 481 { 482 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 483 PetscErrorCode ierr; 484 485 PetscFunctionBegin; 486 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 487 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 488 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 489 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 490 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 491 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 492 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 493 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 494 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 495 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 496 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "PCBDDCResetTopography" 502 PetscErrorCode PCBDDCResetTopography(PC pc) 503 { 504 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 510 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 511 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 512 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 513 PetscFunctionReturn(0); 514 } 515 516 #undef __FUNCT__ 517 #define __FUNCT__ "PCBDDCResetSolvers" 518 PetscErrorCode PCBDDCResetSolvers(PC pc) 519 { 520 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 521 PetscScalar *array; 522 PetscErrorCode ierr; 523 524 PetscFunctionBegin; 525 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 526 if (pcbddc->coarse_phi_B) { 527 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 528 ierr = PetscFree(array);CHKERRQ(ierr); 529 } 530 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 531 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 532 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 533 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 534 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 535 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 536 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 537 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 538 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 539 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 540 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 541 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 542 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 543 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 544 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 545 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 546 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 547 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 548 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 549 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 550 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 551 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 552 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 553 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 554 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 555 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 556 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 557 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 563 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 564 { 565 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 566 PC_IS *pcis = (PC_IS*)pc->data; 567 VecType impVecType; 568 PetscInt n_constraints,n_R,old_size,n_benign; 569 PetscErrorCode ierr; 570 571 PetscFunctionBegin; 572 if (!pcbddc->ConstraintMatrix) { 573 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 574 } 575 /* get sizes */ 576 n_benign = 0; 577 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 578 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 579 n_R = pcis->n - pcbddc->n_vertices; 580 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 581 /* local work vectors (try to avoid unneeded work)*/ 582 /* R nodes */ 583 old_size = -1; 584 if (pcbddc->vec1_R) { 585 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 586 } 587 if (n_R != old_size) { 588 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 589 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 590 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 591 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 592 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 593 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 594 } 595 /* local primal dofs */ 596 old_size = -1; 597 if (pcbddc->vec1_P) { 598 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 599 } 600 if (pcbddc->local_primal_size != old_size) { 601 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 602 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 603 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 604 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 605 } 606 /* local explicit constraints */ 607 old_size = -1; 608 if (pcbddc->vec1_C) { 609 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 610 } 611 if (n_constraints && n_constraints != old_size) { 612 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 613 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 614 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 615 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 616 } 617 PetscFunctionReturn(0); 618 } 619 620 #undef __FUNCT__ 621 #define __FUNCT__ "PCBDDCSetUpCorrection" 622 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 623 { 624 PetscErrorCode ierr; 625 /* pointers to pcis and pcbddc */ 626 PC_IS* pcis = (PC_IS*)pc->data; 627 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 628 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 629 /* submatrices of local problem */ 630 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 631 /* submatrices of benign trick */ 632 Mat B0_V = NULL; 633 /* submatrices of local coarse problem */ 634 Mat S_VV,S_CV,S_VC,S_CC; 635 /* working matrices */ 636 Mat C_CR; 637 /* additional working stuff */ 638 PC pc_R; 639 Mat F,B0 = NULL; 640 PetscBool isLU,isCHOL,isILU; 641 642 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 643 PetscScalar *work; 644 PetscInt *idx_V_B; 645 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 646 PetscInt i,n_R,n_D,n_B; 647 PetscBool unsymmetric_check; 648 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 649 MatType impMatType; 650 /* some shortcuts to scalars */ 651 PetscScalar one=1.0,m_one=-1.0; 652 653 PetscFunctionBegin; 654 n_benign = 0; 655 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 656 n_vertices = pcbddc->n_vertices; 657 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 658 /* Set Non-overlapping dimensions */ 659 n_B = pcis->n_B; 660 n_D = pcis->n - n_B; 661 n_R = pcis->n - n_vertices; 662 663 /* Set types for local objects needed by BDDC precondtioner */ 664 impMatType = MATSEQDENSE; 665 666 /* vertices in boundary numbering */ 667 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 668 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 669 if (i != n_vertices) { 670 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 671 } 672 673 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 674 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 675 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 676 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 677 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 678 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 679 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 680 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 681 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 682 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 683 684 unsymmetric_check = PETSC_FALSE; 685 /* allocate workspace */ 686 n = 0; 687 if (n_constraints) { 688 n += n_R*n_constraints; 689 } 690 if (n_vertices) { 691 n = PetscMax(2*n_R*n_vertices,n); 692 n = PetscMax((n_R+n_B)*n_vertices,n); 693 } 694 if (!pcbddc->symmetric_primal) { 695 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 696 unsymmetric_check = PETSC_TRUE; 697 } 698 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 699 700 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 701 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 702 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 703 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 704 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 705 if (isLU || isILU || isCHOL) { 706 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 707 } else if (sub_schurs->reuse_mumps) { 708 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 709 MatFactorType type; 710 711 F = reuse_mumps->F; 712 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 713 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 714 } else { 715 F = NULL; 716 } 717 718 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 719 if (n_constraints) { 720 Mat M1,M2,M3; 721 Mat auxmat; 722 IS is_aux; 723 PetscScalar *array,*array2; 724 725 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 726 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 727 728 /* Extract constraints on R nodes: C_{CR} */ 729 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 730 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 731 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 732 733 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 734 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 735 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 736 for (i=0;i<n_constraints;i++) { 737 const PetscScalar *row_cmat_values; 738 const PetscInt *row_cmat_indices; 739 PetscInt size_of_constraint,j; 740 741 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 742 for (j=0;j<size_of_constraint;j++) { 743 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 744 } 745 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 746 } 747 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 748 if (F) { 749 Mat B; 750 751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 752 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 753 ierr = MatDestroy(&B);CHKERRQ(ierr); 754 } else { 755 PetscScalar *marr; 756 757 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 758 for (i=0;i<n_constraints;i++) { 759 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 760 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 761 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 762 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 763 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 764 } 765 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 766 } 767 if (!pcbddc->switch_static) { 768 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 769 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 770 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 771 for (i=0;i<n_constraints;i++) { 772 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 773 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 774 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 775 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 776 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 777 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 778 } 779 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 780 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 781 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 782 } else { 783 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 784 pcbddc->local_auxmat2 = local_auxmat2_R; 785 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 786 } 787 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 788 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 789 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 790 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 791 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 792 if (isCHOL) { 793 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 794 } else { 795 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 796 } 797 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 798 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 799 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 800 ierr = MatDestroy(&M2);CHKERRQ(ierr); 801 ierr = MatDestroy(&M3);CHKERRQ(ierr); 802 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 803 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 804 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 805 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 806 ierr = MatDestroy(&M1);CHKERRQ(ierr); 807 } 808 /* Get submatrices from subdomain matrix */ 809 if (n_benign) { 810 IS dummy; 811 Mat B0_R; 812 PetscReal norm; 813 PetscInt ii[2]; 814 815 ii[0] = 0; 816 ii[1] = pcbddc->B0_ncol; 817 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 818 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 819 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 820 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 821 if (norm > PETSC_SMALL) { 822 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 823 } 824 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 825 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 826 } 827 828 if (n_vertices) { 829 IS is_aux; 830 831 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 832 IS tis; 833 834 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 835 ierr = ISSort(tis);CHKERRQ(ierr); 836 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 837 ierr = ISDestroy(&tis);CHKERRQ(ierr); 838 } else { 839 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 840 } 841 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 842 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 843 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 844 if (n_benign) { 845 IS dummy; 846 847 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 848 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 849 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 850 } 851 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 852 } 853 854 /* Matrix of coarse basis functions (local) */ 855 if (pcbddc->coarse_phi_B) { 856 PetscInt on_B,on_primal,on_D=n_D; 857 if (pcbddc->coarse_phi_D) { 858 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 859 } 860 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 861 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 862 PetscScalar *marray; 863 864 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 865 ierr = PetscFree(marray);CHKERRQ(ierr); 866 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 867 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 868 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 869 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 870 } 871 } 872 873 if (!pcbddc->coarse_phi_B) { 874 PetscScalar *marray; 875 876 n = n_B*pcbddc->local_primal_size; 877 if (pcbddc->switch_static || pcbddc->dbg_flag) { 878 n += n_D*pcbddc->local_primal_size; 879 } 880 if (!pcbddc->symmetric_primal) { 881 n *= 2; 882 } 883 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 884 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 885 n = n_B*pcbddc->local_primal_size; 886 if (pcbddc->switch_static || pcbddc->dbg_flag) { 887 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 888 n += n_D*pcbddc->local_primal_size; 889 } 890 if (!pcbddc->symmetric_primal) { 891 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 892 if (pcbddc->switch_static || pcbddc->dbg_flag) { 893 n = n_B*pcbddc->local_primal_size; 894 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 895 } 896 } else { 897 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 898 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 899 if (pcbddc->switch_static || pcbddc->dbg_flag) { 900 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 901 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 902 } 903 } 904 } 905 906 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 907 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 908 const PetscInt *idxs; 909 910 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 911 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 912 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 913 } 914 915 /* vertices */ 916 if (n_vertices) { 917 918 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 919 920 if (n_R) { 921 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 922 PetscBLASInt B_N,B_one = 1; 923 PetscScalar *x,*y; 924 PetscBool isseqaij; 925 926 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 927 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 928 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 929 if (F) { /* TODO could be optimized for symmetric problems */ 930 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 931 } else { 932 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 933 for (i=0;i<n_vertices;i++) { 934 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 935 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 936 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 937 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 938 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 939 } 940 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 941 } 942 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 943 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 944 if (n_constraints) { 945 Mat B; 946 947 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 948 for (i=0;i<n_vertices;i++) { 949 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 950 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 951 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 953 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 954 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 955 } 956 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 957 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 958 ierr = MatDestroy(&B);CHKERRQ(ierr); 959 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 960 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 961 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 962 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 963 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 964 ierr = MatDestroy(&B);CHKERRQ(ierr); 965 } 966 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 967 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 968 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 969 } 970 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 971 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 972 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 973 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 974 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 975 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 976 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 977 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 978 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 979 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 980 } else { 981 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 982 } 983 if (n_benign) { 984 const PetscScalar *vals; 985 const PetscInt *idxs; 986 PetscInt n,j; 987 988 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 989 for (j=0;j<n;j++) { 990 coarse_submat_vals[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+idxs[j]] = vals[j]; 991 coarse_submat_vals[(idxs[j]+1)*pcbddc->local_primal_size-1] = vals[j]; 992 } 993 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 994 } 995 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 996 997 /* coarse basis functions */ 998 for (i=0;i<n_vertices;i++) { 999 PetscScalar *y; 1000 1001 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1002 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1003 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1004 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 y[n_B*i+idx_V_B[i]] = 1.0; 1007 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1008 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1009 1010 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1011 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1012 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1013 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1015 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1016 if (n_benign) y[n_D*i+p0_lidx_I] = 0.0; 1017 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1018 } 1019 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1020 } 1021 /* if n_R == 0 the object is not destroyed */ 1022 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1023 } 1024 1025 if (n_constraints) { 1026 Mat B; 1027 1028 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1029 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1030 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1031 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1032 if (n_vertices) { 1033 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1034 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1035 } else { 1036 Mat S_VCt; 1037 1038 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1039 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1040 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1041 } 1042 } 1043 ierr = MatDestroy(&B);CHKERRQ(ierr); 1044 /* coarse basis functions */ 1045 for (i=0;i<n_constraints;i++) { 1046 PetscScalar *y; 1047 1048 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1049 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1050 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1051 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1054 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1055 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1056 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1057 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1058 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1059 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1060 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1061 if (n_benign) y[n_D*(i+n_vertices)+p0_lidx_I] = 0.0; 1062 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1063 } 1064 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1065 } 1066 } 1067 if (n_constraints) { 1068 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1069 } 1070 1071 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1072 1073 /* compute other basis functions for non-symmetric problems */ 1074 if (!pcbddc->symmetric_primal) { 1075 1076 if (n_constraints) { 1077 Mat S_CCT,B_C; 1078 1079 /* this is a lazy thing */ 1080 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1081 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1082 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1083 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1084 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1085 if (n_vertices) { 1086 Mat B_V,S_VCT; 1087 1088 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1089 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1090 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1091 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1092 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1093 } 1094 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1095 } else { /* if there are no constraints, reset work */ 1096 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1097 } 1098 if (n_vertices && n_R) { 1099 Mat A_VRT; 1100 PetscScalar *marray; 1101 PetscBLASInt B_N,B_one = 1; 1102 1103 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1104 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1105 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1106 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1107 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1108 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1109 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1110 } 1111 1112 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1113 for (i=0;i<pcbddc->local_primal_size;i++) { 1114 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1115 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1116 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1117 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1118 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1119 } 1120 } else { 1121 for (i=0;i<pcbddc->local_primal_size;i++) { 1122 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1123 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1124 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1125 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1126 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1127 } 1128 } 1129 /* coarse basis functions */ 1130 for (i=0;i<pcbddc->local_primal_size;i++) { 1131 PetscScalar *y; 1132 1133 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1134 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1135 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1136 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1137 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 if (i<n_vertices) { 1139 y[n_B*i+idx_V_B[i]] = 1.0; 1140 } 1141 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1142 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1143 1144 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1145 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1146 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1147 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1148 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1149 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1150 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1151 } 1152 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1153 } 1154 } 1155 /* free memory */ 1156 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1157 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1158 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1159 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1160 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1161 ierr = PetscFree(work);CHKERRQ(ierr); 1162 if (n_vertices) { 1163 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1164 } 1165 if (n_constraints) { 1166 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1167 } 1168 /* Checking coarse_sub_mat and coarse basis functios */ 1169 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1170 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1171 if (pcbddc->dbg_flag) { 1172 Mat coarse_sub_mat; 1173 Mat AUXMAT,TM1,TM2,TM3,TM4; 1174 Mat coarse_phi_D,coarse_phi_B; 1175 Mat coarse_psi_D,coarse_psi_B; 1176 Mat A_II,A_BB,A_IB,A_BI; 1177 Mat C_B,CPHI; 1178 IS is_dummy; 1179 Vec mones; 1180 MatType checkmattype=MATSEQAIJ; 1181 PetscReal real_value; 1182 1183 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1184 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1185 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1186 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1187 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1188 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1189 if (unsymmetric_check) { 1190 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1191 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1192 } 1193 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1194 1195 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1196 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1197 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1198 if (unsymmetric_check) { 1199 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1200 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1201 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1202 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1203 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1204 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1205 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1206 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1207 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1208 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1209 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1210 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1211 } else { 1212 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1213 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1214 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1215 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1216 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1217 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1218 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1219 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1220 } 1221 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1222 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1223 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1224 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1225 if (n_benign) { 1226 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1227 PetscScalar *data,*data2; 1228 1229 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1230 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1231 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1232 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1233 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1234 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1235 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1236 for (i=0;i<pcbddc->local_primal_size;i++) { 1237 data[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+i] += data2[i]; 1238 data[(i+1)*pcbddc->local_primal_size-1] += data2[i]; 1239 } 1240 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1241 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1242 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1243 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1244 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1245 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1246 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1247 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1248 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1249 } 1250 #if 0 1251 { 1252 PetscViewer viewer; 1253 char filename[256]; 1254 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1255 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1256 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1257 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1258 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1259 } 1260 #endif 1261 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1262 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1263 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1265 1266 /* check constraints */ 1267 if (!n_benign) { 1268 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1269 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1270 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1271 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1272 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1273 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1274 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1276 if (unsymmetric_check) { 1277 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1278 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1279 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1280 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1282 } 1283 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1284 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1285 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1286 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1287 } 1288 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1289 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1290 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1291 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1292 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1293 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1294 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1295 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1296 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1297 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1298 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1299 if (unsymmetric_check) { 1300 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1301 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1302 } 1303 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1304 } 1305 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1306 /* get back data */ 1307 *coarse_submat_vals_n = coarse_submat_vals; 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1313 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1314 { 1315 Mat *work_mat; 1316 IS isrow_s,iscol_s; 1317 PetscBool rsorted,csorted; 1318 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1323 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1324 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1325 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1326 1327 if (!rsorted) { 1328 const PetscInt *idxs; 1329 PetscInt *idxs_sorted,i; 1330 1331 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1332 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1333 for (i=0;i<rsize;i++) { 1334 idxs_perm_r[i] = i; 1335 } 1336 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1337 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1338 for (i=0;i<rsize;i++) { 1339 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1340 } 1341 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1342 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1343 } else { 1344 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1345 isrow_s = isrow; 1346 } 1347 1348 if (!csorted) { 1349 if (isrow == iscol) { 1350 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1351 iscol_s = isrow_s; 1352 } else { 1353 const PetscInt *idxs; 1354 PetscInt *idxs_sorted,i; 1355 1356 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1357 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1358 for (i=0;i<csize;i++) { 1359 idxs_perm_c[i] = i; 1360 } 1361 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1362 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1363 for (i=0;i<csize;i++) { 1364 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1365 } 1366 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1367 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1368 } 1369 } else { 1370 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1371 iscol_s = iscol; 1372 } 1373 1374 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1375 1376 if (!rsorted || !csorted) { 1377 Mat new_mat; 1378 IS is_perm_r,is_perm_c; 1379 1380 if (!rsorted) { 1381 PetscInt *idxs_r,i; 1382 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1383 for (i=0;i<rsize;i++) { 1384 idxs_r[idxs_perm_r[i]] = i; 1385 } 1386 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1387 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1388 } else { 1389 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1390 } 1391 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1392 1393 if (!csorted) { 1394 if (isrow_s == iscol_s) { 1395 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1396 is_perm_c = is_perm_r; 1397 } else { 1398 PetscInt *idxs_c,i; 1399 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1400 for (i=0;i<csize;i++) { 1401 idxs_c[idxs_perm_c[i]] = i; 1402 } 1403 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1404 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1405 } 1406 } else { 1407 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1408 } 1409 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1410 1411 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1412 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1413 work_mat[0] = new_mat; 1414 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1415 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1416 } 1417 1418 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1419 *B = work_mat[0]; 1420 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1421 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1422 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1428 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1429 { 1430 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1431 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1432 Mat new_mat; 1433 IS is_local,is_global; 1434 PetscInt local_size; 1435 PetscBool isseqaij; 1436 PetscErrorCode ierr; 1437 1438 PetscFunctionBegin; 1439 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1440 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1441 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1442 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1443 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1444 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1445 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1446 1447 /* check */ 1448 if (pcbddc->dbg_flag) { 1449 Vec x,x_change; 1450 PetscReal error; 1451 1452 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1453 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1454 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1455 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1456 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1458 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1459 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1460 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1461 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1462 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1463 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1464 ierr = VecDestroy(&x);CHKERRQ(ierr); 1465 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1466 } 1467 1468 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1469 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1470 if (isseqaij) { 1471 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1472 } else { 1473 Mat work_mat; 1474 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1475 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1476 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1477 } 1478 if (matis->A->symmetric_set) { 1479 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1480 #if !defined(PETSC_USE_COMPLEX) 1481 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1482 #endif 1483 } 1484 /* 1485 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1486 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1487 */ 1488 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1489 PetscFunctionReturn(0); 1490 } 1491 1492 #undef __FUNCT__ 1493 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1494 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1495 { 1496 PC_IS* pcis = (PC_IS*)(pc->data); 1497 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1498 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1499 PetscInt *idx_R_local=NULL; 1500 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1501 PetscInt vbs,bs; 1502 PetscBT bitmask=NULL; 1503 PetscErrorCode ierr; 1504 1505 PetscFunctionBegin; 1506 /* 1507 No need to setup local scatters if 1508 - primal space is unchanged 1509 AND 1510 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1511 AND 1512 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1513 */ 1514 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1515 PetscFunctionReturn(0); 1516 } 1517 /* destroy old objects */ 1518 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1519 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1520 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1521 /* Set Non-overlapping dimensions */ 1522 n_B = pcis->n_B; 1523 n_D = pcis->n - n_B; 1524 n_vertices = pcbddc->n_vertices; 1525 1526 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1527 1528 /* create auxiliary bitmask and allocate workspace */ 1529 if (!sub_schurs->reuse_mumps) { 1530 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1531 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1532 for (i=0;i<n_vertices;i++) { 1533 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1534 } 1535 1536 for (i=0, n_R=0; i<pcis->n; i++) { 1537 if (!PetscBTLookup(bitmask,i)) { 1538 idx_R_local[n_R++] = i; 1539 } 1540 } 1541 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1542 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1543 1544 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1545 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1546 } 1547 1548 /* Block code */ 1549 vbs = 1; 1550 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1551 if (bs>1 && !(n_vertices%bs)) { 1552 PetscBool is_blocked = PETSC_TRUE; 1553 PetscInt *vary; 1554 if (!sub_schurs->reuse_mumps) { 1555 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1556 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1557 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1558 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1559 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1560 for (i=0; i<pcis->n/bs; i++) { 1561 if (vary[i]!=0 && vary[i]!=bs) { 1562 is_blocked = PETSC_FALSE; 1563 break; 1564 } 1565 } 1566 ierr = PetscFree(vary);CHKERRQ(ierr); 1567 } else { 1568 /* Verify directly the R set */ 1569 for (i=0; i<n_R/bs; i++) { 1570 PetscInt j,node=idx_R_local[bs*i]; 1571 for (j=1; j<bs; j++) { 1572 if (node != idx_R_local[bs*i+j]-j) { 1573 is_blocked = PETSC_FALSE; 1574 break; 1575 } 1576 } 1577 } 1578 } 1579 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1580 vbs = bs; 1581 for (i=0;i<n_R/vbs;i++) { 1582 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1583 } 1584 } 1585 } 1586 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1587 if (sub_schurs->reuse_mumps) { 1588 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1589 1590 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1591 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1592 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1593 reuse_mumps->is_R = pcbddc->is_R_local; 1594 } else { 1595 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1596 } 1597 1598 /* print some info if requested */ 1599 if (pcbddc->dbg_flag) { 1600 PetscInt benign = 0; 1601 1602 if (pcbddc->benign_p0_lidx >= 0) benign = 1; 1603 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1604 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1605 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1606 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1607 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1608 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1609 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1610 } 1611 1612 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1613 if (!sub_schurs->reuse_mumps) { 1614 IS is_aux1,is_aux2; 1615 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1616 1617 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1618 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1619 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1620 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1621 for (i=0; i<n_D; i++) { 1622 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1623 } 1624 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1625 for (i=0, j=0; i<n_R; i++) { 1626 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1627 aux_array1[j++] = i; 1628 } 1629 } 1630 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1631 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1632 for (i=0, j=0; i<n_B; i++) { 1633 if (!PetscBTLookup(bitmask,is_indices[i])) { 1634 aux_array2[j++] = i; 1635 } 1636 } 1637 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1638 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1639 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1640 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1641 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1642 1643 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1644 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1645 for (i=0, j=0; i<n_R; i++) { 1646 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1647 aux_array1[j++] = i; 1648 } 1649 } 1650 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1651 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1652 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1653 } 1654 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1655 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1656 } else { 1657 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1658 IS tis; 1659 PetscInt schur_size; 1660 1661 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1662 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1663 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1664 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1665 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1666 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1667 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1668 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1669 } 1670 } 1671 PetscFunctionReturn(0); 1672 } 1673 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1677 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1678 { 1679 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1680 PC_IS *pcis = (PC_IS*)pc->data; 1681 PC pc_temp; 1682 Mat A_RR; 1683 MatReuse reuse; 1684 PetscScalar m_one = -1.0; 1685 PetscReal value; 1686 PetscInt n_D,n_R; 1687 PetscBool use_exact,use_exact_reduced,issbaij; 1688 PetscErrorCode ierr; 1689 /* prefixes stuff */ 1690 char dir_prefix[256],neu_prefix[256],str_level[16]; 1691 size_t len; 1692 1693 PetscFunctionBegin; 1694 1695 /* compute prefixes */ 1696 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1697 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1698 if (!pcbddc->current_level) { 1699 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1700 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1701 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1702 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1703 } else { 1704 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1705 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1706 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1707 len -= 15; /* remove "pc_bddc_coarse_" */ 1708 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1709 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1710 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1711 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1712 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1713 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1714 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1715 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1716 } 1717 1718 /* DIRICHLET PROBLEM */ 1719 if (dirichlet) { 1720 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1721 if (pcbddc->local_mat->symmetric_set) { 1722 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1723 } 1724 /* Matrix for Dirichlet problem is pcis->A_II */ 1725 n_D = pcis->n - pcis->n_B; 1726 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1727 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1728 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1729 /* default */ 1730 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1731 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1732 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1733 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1734 if (issbaij) { 1735 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1736 } else { 1737 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1738 } 1739 /* Allow user's customization */ 1740 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1741 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1742 } 1743 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1744 if (sub_schurs->reuse_mumps) { 1745 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1746 1747 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1748 } 1749 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1750 if (!n_D) { 1751 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1752 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1753 } 1754 /* Set Up KSP for Dirichlet problem of BDDC */ 1755 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1756 /* set ksp_D into pcis data */ 1757 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1758 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1759 pcis->ksp_D = pcbddc->ksp_D; 1760 } 1761 1762 /* NEUMANN PROBLEM */ 1763 A_RR = 0; 1764 if (neumann) { 1765 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1766 PetscInt ibs,mbs; 1767 PetscBool issbaij; 1768 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1769 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1770 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1771 if (pcbddc->ksp_R) { /* already created ksp */ 1772 PetscInt nn_R; 1773 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1774 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1775 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1776 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1777 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1778 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1779 reuse = MAT_INITIAL_MATRIX; 1780 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1781 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1782 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1783 reuse = MAT_INITIAL_MATRIX; 1784 } else { /* safe to reuse the matrix */ 1785 reuse = MAT_REUSE_MATRIX; 1786 } 1787 } 1788 /* last check */ 1789 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1790 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1791 reuse = MAT_INITIAL_MATRIX; 1792 } 1793 } else { /* first time, so we need to create the matrix */ 1794 reuse = MAT_INITIAL_MATRIX; 1795 } 1796 /* extract A_RR */ 1797 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1798 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1799 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1800 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1801 if (matis->A == pcbddc->local_mat) { 1802 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1803 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1804 } else { 1805 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1806 } 1807 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1808 if (matis->A == pcbddc->local_mat) { 1809 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1810 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1811 } else { 1812 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1813 } 1814 } 1815 if (!sub_schurs->reuse_mumps) { 1816 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1817 if (pcbddc->local_mat->symmetric_set) { 1818 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1819 } 1820 } else { 1821 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1822 1823 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1824 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1825 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1826 } 1827 if (!pcbddc->ksp_R) { /* create object if not present */ 1828 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1829 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1830 /* default */ 1831 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1832 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1833 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1834 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1835 if (issbaij) { 1836 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1837 } else { 1838 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1839 } 1840 /* Allow user's customization */ 1841 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1842 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1843 } 1844 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1845 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1846 if (!n_R) { 1847 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1848 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1849 } 1850 /* Reuse MUMPS solver if it is present */ 1851 if (sub_schurs->reuse_mumps) { 1852 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1853 1854 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1855 } 1856 /* Set Up KSP for Neumann problem of BDDC */ 1857 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1858 } 1859 /* free Neumann problem's matrix */ 1860 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1861 1862 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1863 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1864 if (pcbddc->dbg_flag) { 1865 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1866 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1867 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1868 } 1869 if (dirichlet) { /* Dirichlet */ 1870 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1871 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1872 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1873 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1874 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1875 /* need to be adapted? */ 1876 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1877 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1878 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1879 /* print info */ 1880 if (pcbddc->dbg_flag) { 1881 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1882 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1883 } 1884 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1885 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1886 } 1887 } 1888 if (neumann) { /* Neumann */ 1889 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1890 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1891 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1892 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1893 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1894 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1895 /* need to be adapted? */ 1896 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1897 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1898 /* print info */ 1899 if (pcbddc->dbg_flag) { 1900 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1901 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1902 } 1903 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1904 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1905 } 1906 } 1907 } 1908 PetscFunctionReturn(0); 1909 } 1910 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1913 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1914 { 1915 PetscErrorCode ierr; 1916 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1917 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1918 1919 PetscFunctionBegin; 1920 if (!sub_schurs->reuse_mumps) { 1921 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1922 } 1923 if (!pcbddc->switch_static) { 1924 if (applytranspose && pcbddc->local_auxmat1) { 1925 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1926 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1927 } 1928 if (!sub_schurs->reuse_mumps) { 1929 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1930 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1931 } else { 1932 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1933 1934 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1935 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1936 } 1937 } else { 1938 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1939 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1940 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1941 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1942 if (applytranspose && pcbddc->local_auxmat1) { 1943 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1944 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1945 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1946 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1947 } 1948 } 1949 if (!sub_schurs->reuse_mumps) { 1950 if (applytranspose) { 1951 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1952 } else { 1953 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1954 } 1955 #if defined(PETSC_HAVE_MUMPS) 1956 } else { 1957 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1958 1959 if (applytranspose) { 1960 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1961 } else { 1962 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1963 } 1964 #endif 1965 } 1966 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1967 if (!pcbddc->switch_static) { 1968 if (!sub_schurs->reuse_mumps) { 1969 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1970 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1971 } else { 1972 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1973 1974 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1975 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1976 } 1977 if (!applytranspose && pcbddc->local_auxmat1) { 1978 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1979 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1980 } 1981 } else { 1982 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1983 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1984 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1985 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1986 if (!applytranspose && pcbddc->local_auxmat1) { 1987 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1988 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1989 } 1990 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1991 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1992 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1993 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2001 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2002 { 2003 PetscErrorCode ierr; 2004 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2005 PC_IS* pcis = (PC_IS*) (pc->data); 2006 const PetscScalar zero = 0.0; 2007 2008 PetscFunctionBegin; 2009 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2010 if (applytranspose) { 2011 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2012 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2013 } else { 2014 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2015 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2016 } 2017 2018 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2019 if (pcbddc->benign_p0_lidx >= 0) { 2020 PetscScalar *array; 2021 2022 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2023 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2024 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2025 } 2026 2027 /* start communications from local primal nodes to rhs of coarse solver */ 2028 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2029 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2030 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2031 2032 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2033 /* TODO remove null space when doing multilevel */ 2034 if (pcbddc->coarse_ksp) { 2035 Vec rhs,sol; 2036 2037 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2038 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2039 if (applytranspose) { 2040 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2041 } else { 2042 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2043 } 2044 } 2045 2046 /* Local solution on R nodes */ 2047 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2048 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2049 } 2050 2051 /* communications from coarse sol to local primal nodes */ 2052 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2053 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2054 2055 /* Sum contributions from two levels */ 2056 if (applytranspose) { 2057 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2058 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2059 } else { 2060 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2061 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2062 } 2063 /* store p0 */ 2064 if (pcbddc->benign_p0_lidx >= 0) { 2065 PetscScalar *array; 2066 2067 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2068 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2069 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2070 } 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2076 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2077 { 2078 PetscErrorCode ierr; 2079 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2080 PetscScalar *array; 2081 Vec from,to; 2082 2083 PetscFunctionBegin; 2084 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2085 from = pcbddc->coarse_vec; 2086 to = pcbddc->vec1_P; 2087 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2088 Vec tvec; 2089 2090 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2091 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2092 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2093 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2094 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2095 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2096 } 2097 } else { /* from local to global -> put data in coarse right hand side */ 2098 from = pcbddc->vec1_P; 2099 to = pcbddc->coarse_vec; 2100 } 2101 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2107 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2108 { 2109 PetscErrorCode ierr; 2110 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2111 PetscScalar *array; 2112 Vec from,to; 2113 2114 PetscFunctionBegin; 2115 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2116 from = pcbddc->coarse_vec; 2117 to = pcbddc->vec1_P; 2118 } else { /* from local to global -> put data in coarse right hand side */ 2119 from = pcbddc->vec1_P; 2120 to = pcbddc->coarse_vec; 2121 } 2122 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2123 if (smode == SCATTER_FORWARD) { 2124 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2125 Vec tvec; 2126 2127 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2128 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2129 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2130 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2131 } 2132 } else { 2133 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2134 ierr = VecResetArray(from);CHKERRQ(ierr); 2135 } 2136 } 2137 PetscFunctionReturn(0); 2138 } 2139 2140 /* uncomment for testing purposes */ 2141 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2144 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2145 { 2146 PetscErrorCode ierr; 2147 PC_IS* pcis = (PC_IS*)(pc->data); 2148 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2149 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2150 /* one and zero */ 2151 PetscScalar one=1.0,zero=0.0; 2152 /* space to store constraints and their local indices */ 2153 PetscScalar *constraints_data; 2154 PetscInt *constraints_idxs,*constraints_idxs_B; 2155 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2156 PetscInt *constraints_n; 2157 /* iterators */ 2158 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2159 /* BLAS integers */ 2160 PetscBLASInt lwork,lierr; 2161 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2162 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2163 /* reuse */ 2164 PetscInt olocal_primal_size,olocal_primal_size_cc; 2165 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2166 /* change of basis */ 2167 PetscBool qr_needed; 2168 PetscBT change_basis,qr_needed_idx; 2169 /* auxiliary stuff */ 2170 PetscInt *nnz,*is_indices; 2171 PetscInt ncc; 2172 /* some quantities */ 2173 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2174 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2175 2176 PetscFunctionBegin; 2177 /* Destroy Mat objects computed previously */ 2178 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2179 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2180 /* save info on constraints from previous setup (if any) */ 2181 olocal_primal_size = pcbddc->local_primal_size; 2182 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2183 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2184 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2185 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2186 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2187 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2188 2189 /* print some info */ 2190 if (pcbddc->dbg_flag) { 2191 IS vertices; 2192 PetscInt nv,nedges,nfaces; 2193 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2194 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2195 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2196 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2197 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2198 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2199 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2200 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2201 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2202 } 2203 2204 if (!pcbddc->adaptive_selection) { 2205 IS ISForVertices,*ISForFaces,*ISForEdges; 2206 MatNullSpace nearnullsp; 2207 const Vec *nearnullvecs; 2208 Vec *localnearnullsp; 2209 PetscScalar *array; 2210 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2211 PetscBool nnsp_has_cnst; 2212 /* LAPACK working arrays for SVD or POD */ 2213 PetscBool skip_lapack,boolforchange; 2214 PetscScalar *work; 2215 PetscReal *singular_vals; 2216 #if defined(PETSC_USE_COMPLEX) 2217 PetscReal *rwork; 2218 #endif 2219 #if defined(PETSC_MISSING_LAPACK_GESVD) 2220 PetscScalar *temp_basis,*correlation_mat; 2221 #else 2222 PetscBLASInt dummy_int=1; 2223 PetscScalar dummy_scalar=1.; 2224 #endif 2225 2226 /* Get index sets for faces, edges and vertices from graph */ 2227 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2228 /* free unneeded index sets */ 2229 if (!pcbddc->use_vertices) { 2230 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2231 } 2232 if (!pcbddc->use_edges) { 2233 for (i=0;i<n_ISForEdges;i++) { 2234 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2235 } 2236 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2237 n_ISForEdges = 0; 2238 } 2239 if (!pcbddc->use_faces) { 2240 for (i=0;i<n_ISForFaces;i++) { 2241 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2242 } 2243 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2244 n_ISForFaces = 0; 2245 } 2246 2247 #if defined(PETSC_USE_DEBUG) 2248 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2249 Also use_change_of_basis should be consistent among processors */ 2250 if (pcbddc->NullSpace) { 2251 PetscBool tbool[2],gbool[2]; 2252 2253 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2254 pcbddc->use_change_of_basis = PETSC_TRUE; 2255 if (!ISForEdges) { 2256 pcbddc->use_change_on_faces = PETSC_TRUE; 2257 } 2258 } 2259 tbool[0] = pcbddc->use_change_of_basis; 2260 tbool[1] = pcbddc->use_change_on_faces; 2261 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2262 pcbddc->use_change_of_basis = gbool[0]; 2263 pcbddc->use_change_on_faces = gbool[1]; 2264 } 2265 #endif 2266 2267 /* check if near null space is attached to global mat */ 2268 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2269 if (nearnullsp) { 2270 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2271 /* remove any stored info */ 2272 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2273 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2274 /* store information for BDDC solver reuse */ 2275 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2276 pcbddc->onearnullspace = nearnullsp; 2277 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2278 for (i=0;i<nnsp_size;i++) { 2279 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2280 } 2281 } else { /* if near null space is not provided BDDC uses constants by default */ 2282 nnsp_size = 0; 2283 nnsp_has_cnst = PETSC_TRUE; 2284 } 2285 /* get max number of constraints on a single cc */ 2286 max_constraints = nnsp_size; 2287 if (nnsp_has_cnst) max_constraints++; 2288 2289 /* 2290 Evaluate maximum storage size needed by the procedure 2291 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2292 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2293 There can be multiple constraints per connected component 2294 */ 2295 n_vertices = 0; 2296 if (ISForVertices) { 2297 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2298 } 2299 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2300 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2301 2302 total_counts = n_ISForFaces+n_ISForEdges; 2303 total_counts *= max_constraints; 2304 total_counts += n_vertices; 2305 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2306 2307 total_counts = 0; 2308 max_size_of_constraint = 0; 2309 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2310 IS used_is; 2311 if (i<n_ISForEdges) { 2312 used_is = ISForEdges[i]; 2313 } else { 2314 used_is = ISForFaces[i-n_ISForEdges]; 2315 } 2316 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2317 total_counts += j; 2318 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2319 } 2320 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2321 2322 /* get local part of global near null space vectors */ 2323 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2324 for (k=0;k<nnsp_size;k++) { 2325 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2326 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2327 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2328 } 2329 2330 /* whether or not to skip lapack calls */ 2331 skip_lapack = PETSC_TRUE; 2332 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2333 2334 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2335 if (!skip_lapack) { 2336 PetscScalar temp_work; 2337 2338 #if defined(PETSC_MISSING_LAPACK_GESVD) 2339 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2340 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2341 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2342 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2343 #if defined(PETSC_USE_COMPLEX) 2344 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2345 #endif 2346 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2347 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2348 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2349 lwork = -1; 2350 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2351 #if !defined(PETSC_USE_COMPLEX) 2352 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2353 #else 2354 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2355 #endif 2356 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2357 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2358 #else /* on missing GESVD */ 2359 /* SVD */ 2360 PetscInt max_n,min_n; 2361 max_n = max_size_of_constraint; 2362 min_n = max_constraints; 2363 if (max_size_of_constraint < max_constraints) { 2364 min_n = max_size_of_constraint; 2365 max_n = max_constraints; 2366 } 2367 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2368 #if defined(PETSC_USE_COMPLEX) 2369 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2370 #endif 2371 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2372 lwork = -1; 2373 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2374 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2375 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2376 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2377 #if !defined(PETSC_USE_COMPLEX) 2378 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2379 #else 2380 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2381 #endif 2382 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2383 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2384 #endif /* on missing GESVD */ 2385 /* Allocate optimal workspace */ 2386 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2387 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2388 } 2389 /* Now we can loop on constraining sets */ 2390 total_counts = 0; 2391 constraints_idxs_ptr[0] = 0; 2392 constraints_data_ptr[0] = 0; 2393 /* vertices */ 2394 if (n_vertices) { 2395 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2396 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2397 for (i=0;i<n_vertices;i++) { 2398 constraints_n[total_counts] = 1; 2399 constraints_data[total_counts] = 1.0; 2400 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2401 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2402 total_counts++; 2403 } 2404 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2405 n_vertices = total_counts; 2406 } 2407 2408 /* edges and faces */ 2409 total_counts_cc = total_counts; 2410 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2411 IS used_is; 2412 PetscBool idxs_copied = PETSC_FALSE; 2413 2414 if (ncc<n_ISForEdges) { 2415 used_is = ISForEdges[ncc]; 2416 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2417 } else { 2418 used_is = ISForFaces[ncc-n_ISForEdges]; 2419 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2420 } 2421 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2422 2423 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2424 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2425 /* change of basis should not be performed on local periodic nodes */ 2426 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2427 if (nnsp_has_cnst) { 2428 PetscScalar quad_value; 2429 2430 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2431 idxs_copied = PETSC_TRUE; 2432 2433 if (!pcbddc->use_nnsp_true) { 2434 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2435 } else { 2436 quad_value = 1.0; 2437 } 2438 for (j=0;j<size_of_constraint;j++) { 2439 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2440 } 2441 temp_constraints++; 2442 total_counts++; 2443 } 2444 for (k=0;k<nnsp_size;k++) { 2445 PetscReal real_value; 2446 PetscScalar *ptr_to_data; 2447 2448 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2449 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2450 for (j=0;j<size_of_constraint;j++) { 2451 ptr_to_data[j] = array[is_indices[j]]; 2452 } 2453 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2454 /* check if array is null on the connected component */ 2455 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2456 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2457 if (real_value > 0.0) { /* keep indices and values */ 2458 temp_constraints++; 2459 total_counts++; 2460 if (!idxs_copied) { 2461 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2462 idxs_copied = PETSC_TRUE; 2463 } 2464 } 2465 } 2466 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2467 valid_constraints = temp_constraints; 2468 if (!pcbddc->use_nnsp_true && temp_constraints) { 2469 if (temp_constraints == 1) { /* just normalize the constraint */ 2470 PetscScalar norm,*ptr_to_data; 2471 2472 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2473 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2474 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2475 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2476 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2477 } else { /* perform SVD */ 2478 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2479 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2480 2481 #if defined(PETSC_MISSING_LAPACK_GESVD) 2482 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2483 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2484 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2485 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2486 from that computed using LAPACKgesvd 2487 -> This is due to a different computation of eigenvectors in LAPACKheev 2488 -> The quality of the POD-computed basis will be the same */ 2489 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2490 /* Store upper triangular part of correlation matrix */ 2491 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2492 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2493 for (j=0;j<temp_constraints;j++) { 2494 for (k=0;k<j+1;k++) { 2495 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2496 } 2497 } 2498 /* compute eigenvalues and eigenvectors of correlation matrix */ 2499 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2500 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2501 #if !defined(PETSC_USE_COMPLEX) 2502 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2503 #else 2504 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2505 #endif 2506 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2507 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2508 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2509 j = 0; 2510 while (j < temp_constraints && singular_vals[j] < tol) j++; 2511 total_counts = total_counts-j; 2512 valid_constraints = temp_constraints-j; 2513 /* scale and copy POD basis into used quadrature memory */ 2514 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2515 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2516 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2517 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2518 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2519 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2520 if (j<temp_constraints) { 2521 PetscInt ii; 2522 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2523 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2524 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2525 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2526 for (k=0;k<temp_constraints-j;k++) { 2527 for (ii=0;ii<size_of_constraint;ii++) { 2528 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2529 } 2530 } 2531 } 2532 #else /* on missing GESVD */ 2533 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2534 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2535 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2536 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2537 #if !defined(PETSC_USE_COMPLEX) 2538 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2539 #else 2540 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2541 #endif 2542 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2543 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2544 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2545 k = temp_constraints; 2546 if (k > size_of_constraint) k = size_of_constraint; 2547 j = 0; 2548 while (j < k && singular_vals[k-j-1] < tol) j++; 2549 valid_constraints = k-j; 2550 total_counts = total_counts-temp_constraints+valid_constraints; 2551 #endif /* on missing GESVD */ 2552 } 2553 } 2554 /* update pointers information */ 2555 if (valid_constraints) { 2556 constraints_n[total_counts_cc] = valid_constraints; 2557 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2558 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2559 /* set change_of_basis flag */ 2560 if (boolforchange) { 2561 PetscBTSet(change_basis,total_counts_cc); 2562 } 2563 total_counts_cc++; 2564 } 2565 } 2566 /* free workspace */ 2567 if (!skip_lapack) { 2568 ierr = PetscFree(work);CHKERRQ(ierr); 2569 #if defined(PETSC_USE_COMPLEX) 2570 ierr = PetscFree(rwork);CHKERRQ(ierr); 2571 #endif 2572 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2573 #if defined(PETSC_MISSING_LAPACK_GESVD) 2574 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2575 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2576 #endif 2577 } 2578 for (k=0;k<nnsp_size;k++) { 2579 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2580 } 2581 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2582 /* free index sets of faces, edges and vertices */ 2583 for (i=0;i<n_ISForFaces;i++) { 2584 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2585 } 2586 if (n_ISForFaces) { 2587 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2588 } 2589 for (i=0;i<n_ISForEdges;i++) { 2590 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2591 } 2592 if (n_ISForEdges) { 2593 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2594 } 2595 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2596 } else { 2597 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2598 2599 total_counts = 0; 2600 n_vertices = 0; 2601 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2602 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2603 } 2604 max_constraints = 0; 2605 total_counts_cc = 0; 2606 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2607 total_counts += pcbddc->adaptive_constraints_n[i]; 2608 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2609 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2610 } 2611 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2612 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2613 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2614 constraints_data = pcbddc->adaptive_constraints_data; 2615 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2616 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2617 total_counts_cc = 0; 2618 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2619 if (pcbddc->adaptive_constraints_n[i]) { 2620 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2621 } 2622 } 2623 #if 0 2624 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2625 for (i=0;i<total_counts_cc;i++) { 2626 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2627 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2628 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2629 printf(" %d",constraints_idxs[j]); 2630 } 2631 printf("\n"); 2632 printf("number of cc: %d\n",constraints_n[i]); 2633 } 2634 for (i=0;i<n_vertices;i++) { 2635 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2636 } 2637 for (i=0;i<sub_schurs->n_subs;i++) { 2638 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2639 } 2640 #endif 2641 2642 max_size_of_constraint = 0; 2643 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2644 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2645 /* Change of basis */ 2646 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2647 if (pcbddc->use_change_of_basis) { 2648 for (i=0;i<sub_schurs->n_subs;i++) { 2649 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2650 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2651 } 2652 } 2653 } 2654 } 2655 pcbddc->local_primal_size = total_counts; 2656 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2657 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2658 2659 /* map constraints_idxs in boundary numbering */ 2660 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2661 if (i != constraints_idxs_ptr[total_counts_cc]) { 2662 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2663 } 2664 2665 /* Create constraint matrix */ 2666 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2667 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2668 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2669 2670 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2671 /* determine if a QR strategy is needed for change of basis */ 2672 qr_needed = PETSC_FALSE; 2673 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2674 total_primal_vertices=0; 2675 pcbddc->local_primal_size_cc = 0; 2676 for (i=0;i<total_counts_cc;i++) { 2677 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2678 if (size_of_constraint == 1) { 2679 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2680 pcbddc->local_primal_size_cc += 1; 2681 } else if (PetscBTLookup(change_basis,i)) { 2682 for (k=0;k<constraints_n[i];k++) { 2683 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2684 } 2685 pcbddc->local_primal_size_cc += constraints_n[i]; 2686 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2687 PetscBTSet(qr_needed_idx,i); 2688 qr_needed = PETSC_TRUE; 2689 } 2690 } else { 2691 pcbddc->local_primal_size_cc += 1; 2692 } 2693 } 2694 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2695 pcbddc->n_vertices = total_primal_vertices; 2696 /* permute indices in order to have a sorted set of vertices */ 2697 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2698 2699 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2700 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2701 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2702 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2703 2704 /* nonzero structure of constraint matrix */ 2705 /* and get reference dof for local constraints */ 2706 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2707 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2708 2709 j = total_primal_vertices; 2710 total_counts = total_primal_vertices; 2711 cum = total_primal_vertices; 2712 for (i=n_vertices;i<total_counts_cc;i++) { 2713 if (!PetscBTLookup(change_basis,i)) { 2714 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2715 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2716 cum++; 2717 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2718 for (k=0;k<constraints_n[i];k++) { 2719 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2720 nnz[j+k] = size_of_constraint; 2721 } 2722 j += constraints_n[i]; 2723 } 2724 } 2725 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2726 ierr = PetscFree(nnz);CHKERRQ(ierr); 2727 2728 /* set values in constraint matrix */ 2729 for (i=0;i<total_primal_vertices;i++) { 2730 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2731 } 2732 total_counts = total_primal_vertices; 2733 for (i=n_vertices;i<total_counts_cc;i++) { 2734 if (!PetscBTLookup(change_basis,i)) { 2735 PetscInt *cols; 2736 2737 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2738 cols = constraints_idxs+constraints_idxs_ptr[i]; 2739 for (k=0;k<constraints_n[i];k++) { 2740 PetscInt row = total_counts+k; 2741 PetscScalar *vals; 2742 2743 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2744 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2745 } 2746 total_counts += constraints_n[i]; 2747 } 2748 } 2749 /* assembling */ 2750 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2751 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2752 2753 /* 2754 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2755 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2756 */ 2757 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2758 if (pcbddc->use_change_of_basis) { 2759 /* dual and primal dofs on a single cc */ 2760 PetscInt dual_dofs,primal_dofs; 2761 /* working stuff for GEQRF */ 2762 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2763 PetscBLASInt lqr_work; 2764 /* working stuff for UNGQR */ 2765 PetscScalar *gqr_work,lgqr_work_t; 2766 PetscBLASInt lgqr_work; 2767 /* working stuff for TRTRS */ 2768 PetscScalar *trs_rhs; 2769 PetscBLASInt Blas_NRHS; 2770 /* pointers for values insertion into change of basis matrix */ 2771 PetscInt *start_rows,*start_cols; 2772 PetscScalar *start_vals; 2773 /* working stuff for values insertion */ 2774 PetscBT is_primal; 2775 PetscInt *aux_primal_numbering_B; 2776 /* matrix sizes */ 2777 PetscInt global_size,local_size; 2778 /* temporary change of basis */ 2779 Mat localChangeOfBasisMatrix; 2780 /* extra space for debugging */ 2781 PetscScalar *dbg_work; 2782 2783 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2784 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2785 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2786 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2787 /* nonzeros for local mat */ 2788 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2789 for (i=0;i<pcis->n;i++) nnz[i]=1; 2790 for (i=n_vertices;i<total_counts_cc;i++) { 2791 if (PetscBTLookup(change_basis,i)) { 2792 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2793 if (PetscBTLookup(qr_needed_idx,i)) { 2794 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2795 } else { 2796 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2797 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2798 } 2799 } 2800 } 2801 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2802 ierr = PetscFree(nnz);CHKERRQ(ierr); 2803 /* Set initial identity in the matrix */ 2804 for (i=0;i<pcis->n;i++) { 2805 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2806 } 2807 2808 if (pcbddc->dbg_flag) { 2809 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2810 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2811 } 2812 2813 2814 /* Now we loop on the constraints which need a change of basis */ 2815 /* 2816 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2817 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2818 2819 Basic blocks of change of basis matrix T computed by 2820 2821 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2822 2823 | 1 0 ... 0 s_1/S | 2824 | 0 1 ... 0 s_2/S | 2825 | ... | 2826 | 0 ... 1 s_{n-1}/S | 2827 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2828 2829 with S = \sum_{i=1}^n s_i^2 2830 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2831 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2832 2833 - QR decomposition of constraints otherwise 2834 */ 2835 if (qr_needed) { 2836 /* space to store Q */ 2837 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2838 /* first we issue queries for optimal work */ 2839 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2840 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2841 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2842 lqr_work = -1; 2843 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2844 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2845 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2846 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2847 lgqr_work = -1; 2848 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2849 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2850 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2851 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2852 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2853 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2854 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2855 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2856 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2857 /* array to store scaling factors for reflectors */ 2858 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2859 /* array to store rhs and solution of triangular solver */ 2860 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2861 /* allocating workspace for check */ 2862 if (pcbddc->dbg_flag) { 2863 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2864 } 2865 } 2866 /* array to store whether a node is primal or not */ 2867 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2868 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2869 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2870 if (i != total_primal_vertices) { 2871 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2872 } 2873 for (i=0;i<total_primal_vertices;i++) { 2874 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2875 } 2876 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2877 2878 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2879 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2880 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2881 if (PetscBTLookup(change_basis,total_counts)) { 2882 /* get constraint info */ 2883 primal_dofs = constraints_n[total_counts]; 2884 dual_dofs = size_of_constraint-primal_dofs; 2885 2886 if (pcbddc->dbg_flag) { 2887 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2888 } 2889 2890 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2891 2892 /* copy quadrature constraints for change of basis check */ 2893 if (pcbddc->dbg_flag) { 2894 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2895 } 2896 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2897 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2898 2899 /* compute QR decomposition of constraints */ 2900 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2901 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2902 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2903 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2904 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2905 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2906 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2907 2908 /* explictly compute R^-T */ 2909 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2910 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2911 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2912 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2913 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2914 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2915 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2916 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2917 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2918 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2919 2920 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2921 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2922 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2923 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2924 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2925 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2926 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2927 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2928 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2929 2930 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2931 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2932 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2933 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2934 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2935 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2936 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2937 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2938 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2939 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2940 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2941 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2942 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2943 2944 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2945 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2946 /* insert cols for primal dofs */ 2947 for (j=0;j<primal_dofs;j++) { 2948 start_vals = &qr_basis[j*size_of_constraint]; 2949 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2950 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2951 } 2952 /* insert cols for dual dofs */ 2953 for (j=0,k=0;j<dual_dofs;k++) { 2954 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2955 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2956 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2957 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2958 j++; 2959 } 2960 } 2961 2962 /* check change of basis */ 2963 if (pcbddc->dbg_flag) { 2964 PetscInt ii,jj; 2965 PetscBool valid_qr=PETSC_TRUE; 2966 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2967 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2968 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2969 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2970 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2971 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2972 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2973 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2974 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2975 for (jj=0;jj<size_of_constraint;jj++) { 2976 for (ii=0;ii<primal_dofs;ii++) { 2977 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2978 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2979 } 2980 } 2981 if (!valid_qr) { 2982 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2983 for (jj=0;jj<size_of_constraint;jj++) { 2984 for (ii=0;ii<primal_dofs;ii++) { 2985 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2986 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2987 } 2988 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2989 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2990 } 2991 } 2992 } 2993 } else { 2994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2995 } 2996 } 2997 } else { /* simple transformation block */ 2998 PetscInt row,col; 2999 PetscScalar val,norm; 3000 3001 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3002 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3003 for (j=0;j<size_of_constraint;j++) { 3004 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3005 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3006 if (!PetscBTLookup(is_primal,row_B)) { 3007 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3008 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3009 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3010 } else { 3011 for (k=0;k<size_of_constraint;k++) { 3012 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3013 if (row != col) { 3014 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3015 } else { 3016 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3017 } 3018 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3019 } 3020 } 3021 } 3022 if (pcbddc->dbg_flag) { 3023 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3024 } 3025 } 3026 } else { 3027 if (pcbddc->dbg_flag) { 3028 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3029 } 3030 } 3031 } 3032 3033 /* free workspace */ 3034 if (qr_needed) { 3035 if (pcbddc->dbg_flag) { 3036 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3037 } 3038 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3039 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3040 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3041 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3042 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3043 } 3044 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3045 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3046 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3047 3048 /* assembling of global change of variable */ 3049 { 3050 Mat tmat; 3051 PetscInt bs; 3052 3053 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3054 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3055 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3056 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3057 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3058 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3059 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3060 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3061 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3062 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3063 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3064 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3065 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3066 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3067 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3068 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3069 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3070 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3071 } 3072 /* check */ 3073 if (pcbddc->dbg_flag) { 3074 PetscReal error; 3075 Vec x,x_change; 3076 3077 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3078 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3079 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3080 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3081 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3082 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3083 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3084 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3085 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3086 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3087 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3088 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3089 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3090 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3091 ierr = VecDestroy(&x);CHKERRQ(ierr); 3092 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3093 } 3094 3095 /* adapt sub_schurs computed (if any) */ 3096 if (pcbddc->use_deluxe_scaling) { 3097 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3098 if (sub_schurs->S_Ej_all) { 3099 Mat S_new,tmat; 3100 ISLocalToGlobalMapping NtoSall; 3101 IS is_all_N,is_V,is_V_Sall; 3102 const PetscScalar *array; 3103 const PetscInt *idxs_V,*idxs_all; 3104 PetscInt i,n_V; 3105 3106 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3107 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3108 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3109 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3110 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3111 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3112 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3113 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3114 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3115 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3116 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3117 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3118 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3119 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3120 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3121 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3122 for (i=0;i<n_V;i++) { 3123 PetscScalar val; 3124 PetscInt idx; 3125 3126 idx = idxs_V[i]; 3127 val = array[idxs_all[idxs_V[i]]]; 3128 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3129 } 3130 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3131 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3132 sub_schurs->S_Ej_all = S_new; 3133 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3134 if (sub_schurs->sum_S_Ej_all) { 3135 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3136 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3137 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3138 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3139 sub_schurs->sum_S_Ej_all = S_new; 3140 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3141 } 3142 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3143 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3144 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3145 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3146 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3147 } 3148 } 3149 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3150 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3151 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3152 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3153 } 3154 3155 /* set up change of basis context */ 3156 if (pcbddc->ChangeOfBasisMatrix) { 3157 PCBDDCChange_ctx change_ctx; 3158 3159 if (!pcbddc->new_global_mat) { 3160 PetscInt global_size,local_size; 3161 3162 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3163 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3164 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3165 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3166 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3167 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3168 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3169 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3170 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3171 } else { 3172 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3173 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3174 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3175 } 3176 if (!pcbddc->user_ChangeOfBasisMatrix) { 3177 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3178 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3179 } else { 3180 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3181 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3182 } 3183 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3184 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3185 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3186 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3187 } 3188 3189 /* add pressure dof to set of primal nodes for numbering purposes */ 3190 if (pcbddc->benign_p0_lidx >= 0) { 3191 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3192 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx; 3193 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3194 pcbddc->local_primal_size_cc++; 3195 pcbddc->local_primal_size++; 3196 } 3197 3198 /* check if a new primal space has been introduced (also take into account benign trick) */ 3199 pcbddc->new_primal_space_local = PETSC_TRUE; 3200 if (olocal_primal_size == pcbddc->local_primal_size) { 3201 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3202 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3203 if (!pcbddc->new_primal_space_local) { 3204 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3205 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3206 } 3207 } 3208 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3209 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3210 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3211 3212 /* flush dbg viewer */ 3213 if (pcbddc->dbg_flag) { 3214 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3215 } 3216 3217 /* free workspace */ 3218 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3219 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3220 if (!pcbddc->adaptive_selection) { 3221 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3222 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3223 } else { 3224 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3225 pcbddc->adaptive_constraints_idxs_ptr, 3226 pcbddc->adaptive_constraints_data_ptr, 3227 pcbddc->adaptive_constraints_idxs, 3228 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3229 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3230 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3231 } 3232 PetscFunctionReturn(0); 3233 } 3234 3235 #undef __FUNCT__ 3236 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3237 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3238 { 3239 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3240 PC_IS *pcis = (PC_IS*)pc->data; 3241 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3242 PetscInt ierr,i,vertex_size,N; 3243 PetscViewer viewer=pcbddc->dbg_viewer; 3244 3245 PetscFunctionBegin; 3246 /* Reset previously computed graph */ 3247 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3248 /* Init local Graph struct */ 3249 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3250 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3251 3252 /* Check validity of the csr graph passed in by the user */ 3253 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3254 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3255 } 3256 3257 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3258 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3259 PetscInt *xadj,*adjncy; 3260 PetscInt nvtxs; 3261 PetscBool flg_row=PETSC_FALSE; 3262 3263 if (pcbddc->use_local_adj) { 3264 3265 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3266 if (flg_row) { 3267 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3268 pcbddc->computed_rowadj = PETSC_TRUE; 3269 } 3270 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3271 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3272 IS is_dummy; 3273 ISLocalToGlobalMapping l2gmap_dummy; 3274 PetscInt j,sum; 3275 PetscInt *cxadj,*cadjncy; 3276 const PetscInt *idxs; 3277 PCBDDCGraph graph; 3278 PetscBT is_on_boundary; 3279 3280 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3281 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3282 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3283 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3284 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3285 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3286 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3287 if (flg_row) { 3288 graph->xadj = xadj; 3289 graph->adjncy = adjncy; 3290 } 3291 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3292 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3293 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3294 3295 if (pcbddc->dbg_flag) { 3296 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3297 for (i=0;i<graph->ncc;i++) { 3298 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3299 } 3300 } 3301 3302 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3303 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3304 for (i=0;i<pcis->n_B;i++) { 3305 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3306 } 3307 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3308 3309 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3310 sum = 0; 3311 for (i=0;i<graph->ncc;i++) { 3312 PetscInt sizecc = 0; 3313 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3314 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3315 sizecc++; 3316 } 3317 } 3318 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3319 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3320 cxadj[graph->queue[j]] = sizecc; 3321 } 3322 } 3323 sum += sizecc*sizecc; 3324 } 3325 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3326 sum = 0; 3327 for (i=0;i<pcis->n;i++) { 3328 PetscInt temp = cxadj[i]; 3329 cxadj[i] = sum; 3330 sum += temp; 3331 } 3332 cxadj[pcis->n] = sum; 3333 for (i=0;i<graph->ncc;i++) { 3334 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3335 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3336 PetscInt k,sizecc = 0; 3337 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3338 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3339 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3340 sizecc++; 3341 } 3342 } 3343 } 3344 } 3345 } 3346 if (sum) { 3347 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3348 } else { 3349 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3350 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3351 } 3352 graph->xadj = 0; 3353 graph->adjncy = 0; 3354 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3355 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3356 } 3357 } 3358 if (pcbddc->dbg_flag) { 3359 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3360 } 3361 3362 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3363 vertex_size = 1; 3364 if (pcbddc->user_provided_isfordofs) { 3365 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3366 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3367 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3368 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3369 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3370 } 3371 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3372 pcbddc->n_ISForDofs = 0; 3373 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3374 } 3375 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3376 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3377 } else { 3378 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3379 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3380 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3381 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3382 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3383 } 3384 } 3385 } 3386 3387 /* Setup of Graph */ 3388 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3389 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3390 } 3391 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3392 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3393 } 3394 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3395 3396 /* Graph's connected components analysis */ 3397 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3398 3399 /* print some info to stdout */ 3400 if (pcbddc->dbg_flag) { 3401 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3402 } 3403 3404 /* mark topography has done */ 3405 pcbddc->recompute_topography = PETSC_FALSE; 3406 PetscFunctionReturn(0); 3407 } 3408 3409 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3410 #undef __FUNCT__ 3411 #define __FUNCT__ "PCBDDCSubsetNumbering" 3412 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3413 { 3414 PetscSF sf; 3415 PetscLayout map; 3416 const PetscInt *idxs; 3417 PetscInt *leaf_data,*root_data,*gidxs; 3418 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3419 PetscInt n_n,nlocals,start,first_index; 3420 PetscMPIInt commsize; 3421 PetscBool first_found; 3422 PetscErrorCode ierr; 3423 3424 PetscFunctionBegin; 3425 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3426 if (subset_mult) { 3427 PetscCheckSameComm(subset,1,subset_mult,2); 3428 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3429 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3430 } 3431 /* create workspace layout for computing global indices of subset */ 3432 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3433 lbounds[0] = lbounds[1] = 0; 3434 for (i=0;i<n;i++) { 3435 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3436 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3437 } 3438 lbounds[0] = -lbounds[0]; 3439 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3440 gbounds[0] = -gbounds[0]; 3441 N = gbounds[1] - gbounds[0] + 1; 3442 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3443 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3444 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3445 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3446 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3447 3448 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3449 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3450 if (subset_mult) { 3451 const PetscInt* idxs_mult; 3452 3453 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3454 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3455 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3456 } else { 3457 for (i=0;i<n;i++) leaf_data[i] = 1; 3458 } 3459 /* local size of new subset */ 3460 n_n = 0; 3461 for (i=0;i<n;i++) n_n += leaf_data[i]; 3462 3463 /* global indexes in layout */ 3464 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3465 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3466 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3467 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3468 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3469 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3470 3471 /* reduce from leaves to roots */ 3472 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3473 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3474 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3475 3476 /* count indexes in local part of layout */ 3477 nlocals = 0; 3478 first_index = -1; 3479 first_found = PETSC_FALSE; 3480 for (i=0;i<Nl;i++) { 3481 if (!first_found && root_data[i]) { 3482 first_found = PETSC_TRUE; 3483 first_index = i; 3484 } 3485 nlocals += root_data[i]; 3486 } 3487 3488 /* cumulative of number of indexes and size of subset without holes */ 3489 #if defined(PETSC_HAVE_MPI_EXSCAN) 3490 start = 0; 3491 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3492 #else 3493 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3494 start = start-nlocals; 3495 #endif 3496 3497 if (N_n) { /* compute total size of new subset if requested */ 3498 *N_n = start + nlocals; 3499 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3500 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3501 } 3502 3503 /* adapt root data with cumulative */ 3504 if (first_found) { 3505 PetscInt old_index; 3506 3507 root_data[first_index] += start; 3508 old_index = first_index; 3509 for (i=first_index+1;i<Nl;i++) { 3510 if (root_data[i]) { 3511 root_data[i] += root_data[old_index]; 3512 old_index = i; 3513 } 3514 } 3515 } 3516 3517 /* from roots to leaves */ 3518 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3519 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3520 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3521 3522 /* create new IS with global indexes without holes */ 3523 if (subset_mult) { 3524 const PetscInt* idxs_mult; 3525 PetscInt cum; 3526 3527 cum = 0; 3528 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3529 for (i=0;i<n;i++) { 3530 PetscInt j; 3531 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3532 } 3533 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3534 } else { 3535 for (i=0;i<n;i++) { 3536 gidxs[i] = leaf_data[i]-1; 3537 } 3538 } 3539 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3540 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3541 PetscFunctionReturn(0); 3542 } 3543 3544 #undef __FUNCT__ 3545 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3546 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3547 { 3548 PetscInt i,j; 3549 PetscScalar *alphas; 3550 PetscErrorCode ierr; 3551 3552 PetscFunctionBegin; 3553 /* this implements stabilized Gram-Schmidt */ 3554 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3555 for (i=0;i<n;i++) { 3556 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3557 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3558 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3559 } 3560 ierr = PetscFree(alphas);CHKERRQ(ierr); 3561 PetscFunctionReturn(0); 3562 } 3563 3564 #undef __FUNCT__ 3565 #define __FUNCT__ "MatISGetSubassemblingPattern" 3566 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3567 { 3568 IS ranks_send_to; 3569 PetscInt n_neighs,*neighs,*n_shared,**shared; 3570 PetscMPIInt size,rank,color; 3571 PetscInt *xadj,*adjncy; 3572 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3573 PetscInt i,local_size,threshold=0; 3574 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3575 PetscSubcomm subcomm; 3576 PetscErrorCode ierr; 3577 3578 PetscFunctionBegin; 3579 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3580 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3581 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3582 3583 /* Get info on mapping */ 3584 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3585 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3586 3587 /* build local CSR graph of subdomains' connectivity */ 3588 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3589 xadj[0] = 0; 3590 xadj[1] = PetscMax(n_neighs-1,0); 3591 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3592 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3593 3594 if (threshold) { 3595 PetscInt xadj_count = 0; 3596 for (i=1;i<n_neighs;i++) { 3597 if (n_shared[i] > threshold) { 3598 adjncy[xadj_count] = neighs[i]; 3599 adjncy_wgt[xadj_count] = n_shared[i]; 3600 xadj_count++; 3601 } 3602 } 3603 xadj[1] = xadj_count; 3604 } else { 3605 if (xadj[1]) { 3606 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3607 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3608 } 3609 } 3610 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3611 if (use_square) { 3612 for (i=0;i<xadj[1];i++) { 3613 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3614 } 3615 } 3616 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3617 3618 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3619 3620 /* 3621 Restrict work on active processes only. 3622 */ 3623 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3624 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3625 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3626 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3627 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3628 if (color) { 3629 ierr = PetscFree(xadj);CHKERRQ(ierr); 3630 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3631 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3632 } else { 3633 Mat subdomain_adj; 3634 IS new_ranks,new_ranks_contig; 3635 MatPartitioning partitioner; 3636 PetscInt prank,rstart=0,rend=0; 3637 PetscInt *is_indices,*oldranks; 3638 PetscBool aggregate; 3639 3640 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3641 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3642 prank = rank; 3643 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3644 /* 3645 for (i=0;i<size;i++) { 3646 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3647 } 3648 */ 3649 for (i=0;i<xadj[1];i++) { 3650 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3651 } 3652 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3653 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3654 if (aggregate) { 3655 PetscInt lrows,row,ncols,*cols; 3656 PetscMPIInt nrank; 3657 PetscScalar *vals; 3658 3659 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3660 lrows = 0; 3661 if (nrank<redprocs) { 3662 lrows = size/redprocs; 3663 if (nrank<size%redprocs) lrows++; 3664 } 3665 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3666 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3667 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3668 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3669 row = nrank; 3670 ncols = xadj[1]-xadj[0]; 3671 cols = adjncy; 3672 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3673 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3674 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3675 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3676 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3677 ierr = PetscFree(xadj);CHKERRQ(ierr); 3678 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3679 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3680 ierr = PetscFree(vals);CHKERRQ(ierr); 3681 } else { 3682 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3683 } 3684 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3685 3686 /* Partition */ 3687 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3688 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3689 if (use_vwgt) { 3690 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3691 v_wgt[0] = local_size; 3692 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3693 } 3694 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3695 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3696 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3697 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3698 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3699 3700 /* renumber new_ranks to avoid "holes" in new set of processors */ 3701 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3702 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3703 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3704 if (!redprocs) { 3705 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3706 } else { 3707 PetscInt idxs[1]; 3708 PetscMPIInt tag; 3709 MPI_Request *reqs; 3710 3711 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3712 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3713 for (i=rstart;i<rend;i++) { 3714 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3715 } 3716 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3717 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3718 ierr = PetscFree(reqs);CHKERRQ(ierr); 3719 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3720 } 3721 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3722 /* clean up */ 3723 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3724 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3725 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3726 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3727 } 3728 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3729 3730 /* assemble parallel IS for sends */ 3731 i = 1; 3732 if (color) i=0; 3733 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3734 /* get back IS */ 3735 *is_sends = ranks_send_to; 3736 PetscFunctionReturn(0); 3737 } 3738 3739 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3740 3741 #undef __FUNCT__ 3742 #define __FUNCT__ "MatISSubassemble" 3743 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3744 { 3745 Mat local_mat; 3746 IS is_sends_internal; 3747 PetscInt rows,cols,new_local_rows; 3748 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3749 PetscBool ismatis,isdense,newisdense,destroy_mat; 3750 ISLocalToGlobalMapping l2gmap; 3751 PetscInt* l2gmap_indices; 3752 const PetscInt* is_indices; 3753 MatType new_local_type; 3754 /* buffers */ 3755 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3756 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3757 PetscInt *recv_buffer_idxs_local; 3758 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3759 /* MPI */ 3760 MPI_Comm comm,comm_n; 3761 PetscSubcomm subcomm; 3762 PetscMPIInt n_sends,n_recvs,commsize; 3763 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3764 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3765 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3766 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3767 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3768 PetscErrorCode ierr; 3769 3770 PetscFunctionBegin; 3771 /* TODO: add missing checks */ 3772 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3773 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3774 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3775 PetscValidLogicalCollectiveInt(mat,nis,7); 3776 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3777 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3778 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3779 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3780 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3781 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3782 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3783 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3784 PetscInt mrows,mcols,mnrows,mncols; 3785 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3786 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3787 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3788 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3789 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3790 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3791 } 3792 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3793 PetscValidLogicalCollectiveInt(mat,bs,0); 3794 /* prepare IS for sending if not provided */ 3795 if (!is_sends) { 3796 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3797 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3798 } else { 3799 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3800 is_sends_internal = is_sends; 3801 } 3802 3803 /* get comm */ 3804 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3805 3806 /* compute number of sends */ 3807 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3808 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3809 3810 /* compute number of receives */ 3811 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3812 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3813 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3814 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3815 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3816 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3817 ierr = PetscFree(iflags);CHKERRQ(ierr); 3818 3819 /* restrict comm if requested */ 3820 subcomm = 0; 3821 destroy_mat = PETSC_FALSE; 3822 if (restrict_comm) { 3823 PetscMPIInt color,subcommsize; 3824 3825 color = 0; 3826 if (restrict_full) { 3827 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3828 } else { 3829 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3830 } 3831 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3832 subcommsize = commsize - subcommsize; 3833 /* check if reuse has been requested */ 3834 if (reuse == MAT_REUSE_MATRIX) { 3835 if (*mat_n) { 3836 PetscMPIInt subcommsize2; 3837 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3838 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3839 comm_n = PetscObjectComm((PetscObject)*mat_n); 3840 } else { 3841 comm_n = PETSC_COMM_SELF; 3842 } 3843 } else { /* MAT_INITIAL_MATRIX */ 3844 PetscMPIInt rank; 3845 3846 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3847 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3848 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3849 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3850 comm_n = PetscSubcommChild(subcomm); 3851 } 3852 /* flag to destroy *mat_n if not significative */ 3853 if (color) destroy_mat = PETSC_TRUE; 3854 } else { 3855 comm_n = comm; 3856 } 3857 3858 /* prepare send/receive buffers */ 3859 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3860 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3861 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3862 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3863 if (nis) { 3864 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3865 } 3866 3867 /* Get data from local matrices */ 3868 if (!isdense) { 3869 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3870 /* TODO: See below some guidelines on how to prepare the local buffers */ 3871 /* 3872 send_buffer_vals should contain the raw values of the local matrix 3873 send_buffer_idxs should contain: 3874 - MatType_PRIVATE type 3875 - PetscInt size_of_l2gmap 3876 - PetscInt global_row_indices[size_of_l2gmap] 3877 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3878 */ 3879 } else { 3880 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3881 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3882 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3883 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3884 send_buffer_idxs[1] = i; 3885 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3886 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3887 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3888 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3889 for (i=0;i<n_sends;i++) { 3890 ilengths_vals[is_indices[i]] = len*len; 3891 ilengths_idxs[is_indices[i]] = len+2; 3892 } 3893 } 3894 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3895 /* additional is (if any) */ 3896 if (nis) { 3897 PetscMPIInt psum; 3898 PetscInt j; 3899 for (j=0,psum=0;j<nis;j++) { 3900 PetscInt plen; 3901 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3902 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3903 psum += len+1; /* indices + lenght */ 3904 } 3905 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3906 for (j=0,psum=0;j<nis;j++) { 3907 PetscInt plen; 3908 const PetscInt *is_array_idxs; 3909 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3910 send_buffer_idxs_is[psum] = plen; 3911 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3912 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3913 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3914 psum += plen+1; /* indices + lenght */ 3915 } 3916 for (i=0;i<n_sends;i++) { 3917 ilengths_idxs_is[is_indices[i]] = psum; 3918 } 3919 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3920 } 3921 3922 buf_size_idxs = 0; 3923 buf_size_vals = 0; 3924 buf_size_idxs_is = 0; 3925 for (i=0;i<n_recvs;i++) { 3926 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3927 buf_size_vals += (PetscInt)olengths_vals[i]; 3928 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3929 } 3930 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3931 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3932 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3933 3934 /* get new tags for clean communications */ 3935 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3936 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3937 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3938 3939 /* allocate for requests */ 3940 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3941 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3942 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3943 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3944 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3945 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3946 3947 /* communications */ 3948 ptr_idxs = recv_buffer_idxs; 3949 ptr_vals = recv_buffer_vals; 3950 ptr_idxs_is = recv_buffer_idxs_is; 3951 for (i=0;i<n_recvs;i++) { 3952 source_dest = onodes[i]; 3953 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3954 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3955 ptr_idxs += olengths_idxs[i]; 3956 ptr_vals += olengths_vals[i]; 3957 if (nis) { 3958 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3959 ptr_idxs_is += olengths_idxs_is[i]; 3960 } 3961 } 3962 for (i=0;i<n_sends;i++) { 3963 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3964 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3965 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3966 if (nis) { 3967 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3968 } 3969 } 3970 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3971 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3972 3973 /* assemble new l2g map */ 3974 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3975 ptr_idxs = recv_buffer_idxs; 3976 new_local_rows = 0; 3977 for (i=0;i<n_recvs;i++) { 3978 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3979 ptr_idxs += olengths_idxs[i]; 3980 } 3981 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3982 ptr_idxs = recv_buffer_idxs; 3983 new_local_rows = 0; 3984 for (i=0;i<n_recvs;i++) { 3985 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3986 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3987 ptr_idxs += olengths_idxs[i]; 3988 } 3989 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3990 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3991 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3992 3993 /* infer new local matrix type from received local matrices type */ 3994 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3995 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3996 if (n_recvs) { 3997 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3998 ptr_idxs = recv_buffer_idxs; 3999 for (i=0;i<n_recvs;i++) { 4000 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4001 new_local_type_private = MATAIJ_PRIVATE; 4002 break; 4003 } 4004 ptr_idxs += olengths_idxs[i]; 4005 } 4006 switch (new_local_type_private) { 4007 case MATDENSE_PRIVATE: 4008 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4009 new_local_type = MATSEQAIJ; 4010 bs = 1; 4011 } else { /* if I receive only 1 dense matrix */ 4012 new_local_type = MATSEQDENSE; 4013 bs = 1; 4014 } 4015 break; 4016 case MATAIJ_PRIVATE: 4017 new_local_type = MATSEQAIJ; 4018 bs = 1; 4019 break; 4020 case MATBAIJ_PRIVATE: 4021 new_local_type = MATSEQBAIJ; 4022 break; 4023 case MATSBAIJ_PRIVATE: 4024 new_local_type = MATSEQSBAIJ; 4025 break; 4026 default: 4027 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4028 break; 4029 } 4030 } else { /* by default, new_local_type is seqdense */ 4031 new_local_type = MATSEQDENSE; 4032 bs = 1; 4033 } 4034 4035 /* create MATIS object if needed */ 4036 if (reuse == MAT_INITIAL_MATRIX) { 4037 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4038 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4039 } else { 4040 /* it also destroys the local matrices */ 4041 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4042 } 4043 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4044 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4045 4046 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4047 4048 /* Global to local map of received indices */ 4049 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4050 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4051 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4052 4053 /* restore attributes -> type of incoming data and its size */ 4054 buf_size_idxs = 0; 4055 for (i=0;i<n_recvs;i++) { 4056 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4057 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4058 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4059 } 4060 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4061 4062 /* set preallocation */ 4063 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4064 if (!newisdense) { 4065 PetscInt *new_local_nnz=0; 4066 4067 ptr_vals = recv_buffer_vals; 4068 ptr_idxs = recv_buffer_idxs_local; 4069 if (n_recvs) { 4070 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4071 } 4072 for (i=0;i<n_recvs;i++) { 4073 PetscInt j; 4074 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4075 for (j=0;j<*(ptr_idxs+1);j++) { 4076 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4077 } 4078 } else { 4079 /* TODO */ 4080 } 4081 ptr_idxs += olengths_idxs[i]; 4082 } 4083 if (new_local_nnz) { 4084 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4085 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4086 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4087 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4088 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4089 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4090 } else { 4091 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4092 } 4093 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4094 } else { 4095 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4096 } 4097 4098 /* set values */ 4099 ptr_vals = recv_buffer_vals; 4100 ptr_idxs = recv_buffer_idxs_local; 4101 for (i=0;i<n_recvs;i++) { 4102 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4103 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4104 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4105 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4106 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4107 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4108 } else { 4109 /* TODO */ 4110 } 4111 ptr_idxs += olengths_idxs[i]; 4112 ptr_vals += olengths_vals[i]; 4113 } 4114 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4115 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4116 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4117 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4118 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4119 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4120 4121 #if 0 4122 if (!restrict_comm) { /* check */ 4123 Vec lvec,rvec; 4124 PetscReal infty_error; 4125 4126 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4127 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4128 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4129 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4130 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4131 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4132 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4133 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4134 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4135 } 4136 #endif 4137 4138 /* assemble new additional is (if any) */ 4139 if (nis) { 4140 PetscInt **temp_idxs,*count_is,j,psum; 4141 4142 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4143 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4144 ptr_idxs = recv_buffer_idxs_is; 4145 psum = 0; 4146 for (i=0;i<n_recvs;i++) { 4147 for (j=0;j<nis;j++) { 4148 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4149 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4150 psum += plen; 4151 ptr_idxs += plen+1; /* shift pointer to received data */ 4152 } 4153 } 4154 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4155 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4156 for (i=1;i<nis;i++) { 4157 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4158 } 4159 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4160 ptr_idxs = recv_buffer_idxs_is; 4161 for (i=0;i<n_recvs;i++) { 4162 for (j=0;j<nis;j++) { 4163 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4164 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4165 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4166 ptr_idxs += plen+1; /* shift pointer to received data */ 4167 } 4168 } 4169 for (i=0;i<nis;i++) { 4170 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4171 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4172 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4173 } 4174 ierr = PetscFree(count_is);CHKERRQ(ierr); 4175 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4176 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4177 } 4178 /* free workspace */ 4179 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4180 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4181 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4182 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4183 if (isdense) { 4184 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4185 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4186 } else { 4187 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4188 } 4189 if (nis) { 4190 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4191 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4192 } 4193 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4194 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4195 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4196 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4197 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4198 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4199 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4200 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4201 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4202 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4203 ierr = PetscFree(onodes);CHKERRQ(ierr); 4204 if (nis) { 4205 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4206 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4207 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4208 } 4209 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4210 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4211 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4212 for (i=0;i<nis;i++) { 4213 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4214 } 4215 *mat_n = NULL; 4216 } 4217 PetscFunctionReturn(0); 4218 } 4219 4220 /* temporary hack into ksp private data structure */ 4221 #include <petsc/private/kspimpl.h> 4222 4223 #undef __FUNCT__ 4224 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4225 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4226 { 4227 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4228 PC_IS *pcis = (PC_IS*)pc->data; 4229 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4230 MatNullSpace CoarseNullSpace=NULL; 4231 ISLocalToGlobalMapping coarse_islg; 4232 IS coarse_is,*isarray; 4233 PetscInt i,im_active=-1,active_procs=-1; 4234 PetscInt nis,nisdofs,nisneu; 4235 PC pc_temp; 4236 PCType coarse_pc_type; 4237 KSPType coarse_ksp_type; 4238 PetscBool multilevel_requested,multilevel_allowed; 4239 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4240 Mat t_coarse_mat_is; 4241 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4242 PetscMPIInt all_procs; 4243 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4244 PetscBool compute_vecs = PETSC_FALSE; 4245 PetscScalar *array; 4246 PetscErrorCode ierr; 4247 4248 PetscFunctionBegin; 4249 /* Assign global numbering to coarse dofs */ 4250 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4251 PetscInt ocoarse_size; 4252 compute_vecs = PETSC_TRUE; 4253 ocoarse_size = pcbddc->coarse_size; 4254 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4255 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4256 /* see if we can avoid some work */ 4257 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4258 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4259 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4260 PC pc; 4261 PetscBool isbddc; 4262 4263 /* temporary workaround since PCBDDC does not have a reset method so far */ 4264 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4265 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4266 if (isbddc) { 4267 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4268 } 4269 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4270 coarse_reuse = PETSC_FALSE; 4271 } else { /* we can safely reuse already computed coarse matrix */ 4272 coarse_reuse = PETSC_TRUE; 4273 } 4274 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4275 coarse_reuse = PETSC_FALSE; 4276 } 4277 /* reset any subassembling information */ 4278 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4279 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4280 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4281 coarse_reuse = PETSC_TRUE; 4282 } 4283 4284 /* count "active" (i.e. with positive local size) and "void" processes */ 4285 im_active = !!(pcis->n); 4286 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4287 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4288 void_procs = all_procs-active_procs; 4289 csin_type_simple = PETSC_TRUE; 4290 redist = PETSC_FALSE; 4291 if (pcbddc->current_level && void_procs) { 4292 csin_ml = PETSC_TRUE; 4293 ncoarse_ml = void_procs; 4294 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4295 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4296 csin_ds = PETSC_TRUE; 4297 ncoarse_ds = pcbddc->redistribute_coarse; 4298 redist = PETSC_TRUE; 4299 } else { 4300 csin_ds = PETSC_TRUE; 4301 ncoarse_ds = active_procs; 4302 redist = PETSC_TRUE; 4303 } 4304 } else { 4305 csin_ml = PETSC_FALSE; 4306 ncoarse_ml = all_procs; 4307 if (void_procs) { 4308 csin_ds = PETSC_TRUE; 4309 ncoarse_ds = void_procs; 4310 csin_type_simple = PETSC_FALSE; 4311 } else { 4312 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4313 csin_ds = PETSC_TRUE; 4314 ncoarse_ds = pcbddc->redistribute_coarse; 4315 redist = PETSC_TRUE; 4316 } else { 4317 csin_ds = PETSC_FALSE; 4318 ncoarse_ds = all_procs; 4319 } 4320 } 4321 } 4322 4323 /* 4324 test if we can go multilevel: three conditions must be satisfied: 4325 - we have not exceeded the number of levels requested 4326 - we can actually subassemble the active processes 4327 - we can find a suitable number of MPI processes where we can place the subassembled problem 4328 */ 4329 multilevel_allowed = PETSC_FALSE; 4330 multilevel_requested = PETSC_FALSE; 4331 if (pcbddc->current_level < pcbddc->max_levels) { 4332 multilevel_requested = PETSC_TRUE; 4333 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4334 multilevel_allowed = PETSC_FALSE; 4335 } else { 4336 multilevel_allowed = PETSC_TRUE; 4337 } 4338 } 4339 /* determine number of process partecipating to coarse solver */ 4340 if (multilevel_allowed) { 4341 ncoarse = ncoarse_ml; 4342 csin = csin_ml; 4343 redist = PETSC_FALSE; 4344 } else { 4345 ncoarse = ncoarse_ds; 4346 csin = csin_ds; 4347 } 4348 4349 /* creates temporary l2gmap and IS for coarse indexes */ 4350 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4351 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4352 4353 /* creates temporary MATIS object for coarse matrix */ 4354 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4355 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4356 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4357 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4358 #if 0 4359 { 4360 PetscViewer viewer; 4361 char filename[256]; 4362 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4363 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4364 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4365 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4366 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4367 } 4368 #endif 4369 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4370 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4371 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4372 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4373 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4374 4375 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4376 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4377 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4378 const PetscInt *idxs; 4379 ISLocalToGlobalMapping tmap; 4380 4381 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4382 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4383 /* allocate space for temporary storage */ 4384 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4385 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4386 /* allocate for IS array */ 4387 nisdofs = pcbddc->n_ISForDofsLocal; 4388 nisneu = !!pcbddc->NeumannBoundariesLocal; 4389 nis = nisdofs + nisneu; 4390 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4391 /* dofs splitting */ 4392 for (i=0;i<nisdofs;i++) { 4393 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4394 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4395 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4396 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4397 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4398 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4399 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4400 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4401 } 4402 /* neumann boundaries */ 4403 if (pcbddc->NeumannBoundariesLocal) { 4404 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4405 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4406 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4407 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4408 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4409 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4410 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4411 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4412 } 4413 /* free memory */ 4414 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4415 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4416 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4417 } else { 4418 nis = 0; 4419 nisdofs = 0; 4420 nisneu = 0; 4421 isarray = NULL; 4422 } 4423 /* destroy no longer needed map */ 4424 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4425 4426 /* restrict on coarse candidates (if needed) */ 4427 coarse_mat_is = NULL; 4428 if (csin) { 4429 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4430 if (redist) { 4431 PetscMPIInt rank; 4432 PetscInt spc,n_spc_p1,dest[1],destsize; 4433 4434 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4435 spc = active_procs/ncoarse; 4436 n_spc_p1 = active_procs%ncoarse; 4437 if (im_active) { 4438 destsize = 1; 4439 if (rank > n_spc_p1*(spc+1)-1) { 4440 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4441 } else { 4442 dest[0] = rank/(spc+1); 4443 } 4444 } else { 4445 destsize = 0; 4446 } 4447 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4448 } else if (csin_type_simple) { 4449 PetscMPIInt rank; 4450 PetscInt issize,isidx; 4451 4452 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4453 if (im_active) { 4454 issize = 1; 4455 isidx = (PetscInt)rank; 4456 } else { 4457 issize = 0; 4458 isidx = -1; 4459 } 4460 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4461 } else { /* get a suitable subassembling pattern from MATIS code */ 4462 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4463 } 4464 4465 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4466 if (!redist || ncoarse <= void_procs) { 4467 PetscInt ncoarse_cand,tissize,*nisindices; 4468 PetscInt *coarse_candidates; 4469 const PetscInt* tisindices; 4470 4471 /* get coarse candidates' ranks in pc communicator */ 4472 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4473 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4474 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4475 if (!coarse_candidates[i]) { 4476 coarse_candidates[ncoarse_cand++]=i; 4477 } 4478 } 4479 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4480 4481 4482 if (pcbddc->dbg_flag) { 4483 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4484 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4485 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4486 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4487 for (i=0;i<ncoarse_cand;i++) { 4488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4489 } 4490 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4491 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4492 } 4493 /* shift the pattern on coarse candidates */ 4494 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4495 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4496 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4497 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4498 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4499 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4500 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4501 } 4502 if (pcbddc->dbg_flag) { 4503 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4504 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4505 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4506 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4507 } 4508 } 4509 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4510 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4511 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4512 } else { /* this is the last level, so use just receiving processes in subcomm */ 4513 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4514 } 4515 } else { 4516 if (pcbddc->dbg_flag) { 4517 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4518 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4519 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4520 } 4521 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4522 coarse_mat_is = t_coarse_mat_is; 4523 } 4524 4525 /* create local to global scatters for coarse problem */ 4526 if (compute_vecs) { 4527 PetscInt lrows; 4528 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4529 if (coarse_mat_is) { 4530 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4531 } else { 4532 lrows = 0; 4533 } 4534 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4535 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4536 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4537 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4538 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4539 } 4540 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4541 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4542 4543 /* set defaults for coarse KSP and PC */ 4544 if (multilevel_allowed) { 4545 coarse_ksp_type = KSPRICHARDSON; 4546 coarse_pc_type = PCBDDC; 4547 } else { 4548 coarse_ksp_type = KSPPREONLY; 4549 coarse_pc_type = PCREDUNDANT; 4550 } 4551 4552 /* print some info if requested */ 4553 if (pcbddc->dbg_flag) { 4554 if (!multilevel_allowed) { 4555 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4556 if (multilevel_requested) { 4557 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4558 } else if (pcbddc->max_levels) { 4559 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4560 } 4561 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4562 } 4563 } 4564 4565 /* create the coarse KSP object only once with defaults */ 4566 if (coarse_mat_is) { 4567 MatReuse coarse_mat_reuse; 4568 PetscViewer dbg_viewer = NULL; 4569 if (pcbddc->dbg_flag) { 4570 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4571 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4572 } 4573 if (!pcbddc->coarse_ksp) { 4574 char prefix[256],str_level[16]; 4575 size_t len; 4576 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4577 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4578 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4579 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4580 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4581 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4582 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4583 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4584 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4585 /* prefix */ 4586 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4587 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4588 if (!pcbddc->current_level) { 4589 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4590 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4591 } else { 4592 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4593 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4594 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4595 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4596 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4597 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4598 } 4599 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4600 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4601 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4602 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4603 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4604 /* allow user customization */ 4605 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4606 } 4607 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4608 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4609 if (nisdofs) { 4610 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4611 for (i=0;i<nisdofs;i++) { 4612 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4613 } 4614 } 4615 if (nisneu) { 4616 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4617 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4618 } 4619 4620 /* get some info after set from options */ 4621 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4622 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4623 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4624 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4625 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4626 isbddc = PETSC_FALSE; 4627 } 4628 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4629 if (isredundant) { 4630 KSP inner_ksp; 4631 PC inner_pc; 4632 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4633 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4634 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4635 } 4636 4637 /* assemble coarse matrix */ 4638 if (coarse_reuse) { 4639 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4640 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4641 coarse_mat_reuse = MAT_REUSE_MATRIX; 4642 } else { 4643 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4644 } 4645 if (isbddc || isnn) { 4646 if (pcbddc->coarsening_ratio > 1) { 4647 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4648 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4649 if (pcbddc->dbg_flag) { 4650 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4651 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4652 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4653 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4654 } 4655 } 4656 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4657 } else { 4658 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4659 coarse_mat = coarse_mat_is; 4660 } 4661 } else { 4662 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4663 } 4664 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4665 4666 /* propagate symmetry info of coarse matrix */ 4667 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4668 if (pc->pmat->symmetric_set) { 4669 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4670 } 4671 if (pc->pmat->hermitian_set) { 4672 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4673 } 4674 if (pc->pmat->spd_set) { 4675 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4676 } 4677 /* set operators */ 4678 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4679 if (pcbddc->dbg_flag) { 4680 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4681 } 4682 } else { /* processes non partecipating to coarse solver (if any) */ 4683 coarse_mat = 0; 4684 } 4685 ierr = PetscFree(isarray);CHKERRQ(ierr); 4686 #if 0 4687 { 4688 PetscViewer viewer; 4689 char filename[256]; 4690 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 4691 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 4692 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4693 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4694 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4695 } 4696 #endif 4697 4698 /* Compute coarse null space (special handling by BDDC only) */ 4699 #if 0 4700 if (pcbddc->NullSpace) { 4701 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4702 } 4703 #endif 4704 4705 if (pcbddc->coarse_ksp) { 4706 Vec crhs,csol; 4707 PetscBool ispreonly; 4708 4709 if (CoarseNullSpace) { 4710 if (isbddc) { 4711 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4712 } else { 4713 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4714 } 4715 } 4716 /* setup coarse ksp */ 4717 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4718 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4719 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4720 /* hack */ 4721 if (!csol) { 4722 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4723 } 4724 if (!crhs) { 4725 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4726 } 4727 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4728 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4729 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4730 KSP check_ksp; 4731 KSPType check_ksp_type; 4732 PC check_pc; 4733 Vec check_vec,coarse_vec; 4734 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4735 PetscInt its; 4736 PetscBool compute_eigs; 4737 PetscReal *eigs_r,*eigs_c; 4738 PetscInt neigs; 4739 const char *prefix; 4740 4741 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4742 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4743 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4744 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4745 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4746 if (ispreonly) { 4747 check_ksp_type = KSPPREONLY; 4748 compute_eigs = PETSC_FALSE; 4749 } else { 4750 check_ksp_type = KSPGMRES; 4751 compute_eigs = PETSC_TRUE; 4752 } 4753 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4754 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4755 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4756 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4757 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4758 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4759 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4760 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4761 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4762 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4763 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4764 /* create random vec */ 4765 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4766 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4767 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4768 if (CoarseNullSpace) { 4769 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4770 } 4771 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4772 /* solve coarse problem */ 4773 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4774 if (CoarseNullSpace) { 4775 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4776 } 4777 /* set eigenvalue estimation if preonly has not been requested */ 4778 if (compute_eigs) { 4779 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4780 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4781 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4782 lambda_max = eigs_r[neigs-1]; 4783 lambda_min = eigs_r[0]; 4784 if (pcbddc->use_coarse_estimates) { 4785 if (lambda_max>lambda_min) { 4786 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4787 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4788 } 4789 } 4790 } 4791 4792 /* check coarse problem residual error */ 4793 if (pcbddc->dbg_flag) { 4794 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4795 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4796 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4797 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4798 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4799 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4800 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4801 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4802 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4803 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4804 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4805 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4806 if (compute_eigs) { 4807 PetscReal lambda_max_s,lambda_min_s; 4808 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4809 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4810 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4811 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4812 for (i=0;i<neigs;i++) { 4813 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4814 } 4815 } 4816 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4817 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4818 } 4819 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4820 if (compute_eigs) { 4821 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4822 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4823 } 4824 } 4825 } 4826 /* print additional info */ 4827 if (pcbddc->dbg_flag) { 4828 /* waits until all processes reaches this point */ 4829 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4830 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4831 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4832 } 4833 4834 /* free memory */ 4835 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4836 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4837 PetscFunctionReturn(0); 4838 } 4839 4840 #undef __FUNCT__ 4841 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4842 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4843 { 4844 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4845 PC_IS* pcis = (PC_IS*)pc->data; 4846 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4847 IS subset,subset_mult,subset_n; 4848 PetscInt local_size,coarse_size=0; 4849 PetscInt *local_primal_indices=NULL; 4850 const PetscInt *t_local_primal_indices; 4851 PetscErrorCode ierr; 4852 4853 PetscFunctionBegin; 4854 /* Compute global number of coarse dofs */ 4855 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4856 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4857 } 4858 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4859 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4860 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4861 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4862 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4863 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4864 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4865 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4866 if (local_size != pcbddc->local_primal_size) { 4867 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4868 } 4869 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4870 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4871 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4872 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4873 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4874 4875 /* check numbering */ 4876 if (pcbddc->dbg_flag) { 4877 PetscScalar coarsesum,*array,*array2; 4878 PetscInt i; 4879 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4880 4881 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4882 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4883 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4884 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4885 /* counter */ 4886 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4887 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4888 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4889 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4890 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4891 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4892 4893 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4894 for (i=0;i<pcbddc->local_primal_size;i++) { 4895 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4896 } 4897 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4898 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4899 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4900 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4901 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4902 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4903 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4904 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4905 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4906 for (i=0;i<pcis->n;i++) { 4907 if (array[i] != 0.0 && array[i] != array2[i]) { 4908 PetscInt owned = (PetscInt)(array[i]); 4909 PetscInt neigh = (PetscInt)(array2[i]); 4910 set_error = PETSC_TRUE; 4911 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 4912 } 4913 } 4914 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4915 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4916 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4917 for (i=0;i<pcis->n;i++) { 4918 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4919 } 4920 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4921 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4922 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4923 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4924 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4925 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4926 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4927 PetscInt *gidxs; 4928 4929 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4930 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4931 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4932 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4933 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4934 for (i=0;i<pcbddc->local_primal_size;i++) { 4935 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4936 } 4937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4938 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4939 } 4940 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4941 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4942 } 4943 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4944 /* get back data */ 4945 *coarse_size_n = coarse_size; 4946 *local_primal_indices_n = local_primal_indices; 4947 PetscFunctionReturn(0); 4948 } 4949 4950 #undef __FUNCT__ 4951 #define __FUNCT__ "PCBDDCGlobalToLocal" 4952 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4953 { 4954 IS localis_t; 4955 PetscInt i,lsize,*idxs,n; 4956 PetscScalar *vals; 4957 PetscErrorCode ierr; 4958 4959 PetscFunctionBegin; 4960 /* get indices in local ordering exploiting local to global map */ 4961 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4962 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4963 for (i=0;i<lsize;i++) vals[i] = 1.0; 4964 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4965 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4966 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4967 if (idxs) { /* multilevel guard */ 4968 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4969 } 4970 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4971 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4972 ierr = PetscFree(vals);CHKERRQ(ierr); 4973 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4974 /* now compute set in local ordering */ 4975 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4976 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4977 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4978 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4979 for (i=0,lsize=0;i<n;i++) { 4980 if (PetscRealPart(vals[i]) > 0.5) { 4981 lsize++; 4982 } 4983 } 4984 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4985 for (i=0,lsize=0;i<n;i++) { 4986 if (PetscRealPart(vals[i]) > 0.5) { 4987 idxs[lsize++] = i; 4988 } 4989 } 4990 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4991 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4992 *localis = localis_t; 4993 PetscFunctionReturn(0); 4994 } 4995 4996 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4997 #undef __FUNCT__ 4998 #define __FUNCT__ "PCBDDCMatMult_Private" 4999 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5000 { 5001 PCBDDCChange_ctx change_ctx; 5002 PetscErrorCode ierr; 5003 5004 PetscFunctionBegin; 5005 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5006 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5007 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5008 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5009 PetscFunctionReturn(0); 5010 } 5011 5012 #undef __FUNCT__ 5013 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5014 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5015 { 5016 PCBDDCChange_ctx change_ctx; 5017 PetscErrorCode ierr; 5018 5019 PetscFunctionBegin; 5020 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5021 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5022 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5023 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5024 PetscFunctionReturn(0); 5025 } 5026 5027 #undef __FUNCT__ 5028 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5029 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5030 { 5031 PC_IS *pcis=(PC_IS*)pc->data; 5032 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5033 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5034 Mat S_j; 5035 PetscInt *used_xadj,*used_adjncy; 5036 PetscBool free_used_adj; 5037 PetscErrorCode ierr; 5038 5039 PetscFunctionBegin; 5040 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5041 free_used_adj = PETSC_FALSE; 5042 if (pcbddc->sub_schurs_layers == -1) { 5043 used_xadj = NULL; 5044 used_adjncy = NULL; 5045 } else { 5046 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5047 used_xadj = pcbddc->mat_graph->xadj; 5048 used_adjncy = pcbddc->mat_graph->adjncy; 5049 } else if (pcbddc->computed_rowadj) { 5050 used_xadj = pcbddc->mat_graph->xadj; 5051 used_adjncy = pcbddc->mat_graph->adjncy; 5052 } else { 5053 PetscBool flg_row=PETSC_FALSE; 5054 const PetscInt *xadj,*adjncy; 5055 PetscInt nvtxs; 5056 5057 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5058 if (flg_row) { 5059 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5060 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5061 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5062 free_used_adj = PETSC_TRUE; 5063 } else { 5064 pcbddc->sub_schurs_layers = -1; 5065 used_xadj = NULL; 5066 used_adjncy = NULL; 5067 } 5068 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5069 } 5070 } 5071 5072 /* setup sub_schurs data */ 5073 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5074 if (!sub_schurs->use_mumps) { 5075 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5076 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5077 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5078 } else { 5079 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5080 PetscBool isseqaij; 5081 if (!pcbddc->use_vertices && reuse_solvers) { 5082 PetscInt n_vertices; 5083 5084 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5085 reuse_solvers = (PetscBool)!n_vertices; 5086 } 5087 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5088 if (!isseqaij) { 5089 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5090 if (matis->A == pcbddc->local_mat) { 5091 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5092 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5093 } else { 5094 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5095 } 5096 } 5097 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5098 } 5099 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5100 5101 /* free adjacency */ 5102 if (free_used_adj) { 5103 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5104 } 5105 PetscFunctionReturn(0); 5106 } 5107 5108 #undef __FUNCT__ 5109 #define __FUNCT__ "PCBDDCInitSubSchurs" 5110 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5111 { 5112 PC_IS *pcis=(PC_IS*)pc->data; 5113 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5114 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5115 PCBDDCGraph graph; 5116 PetscErrorCode ierr; 5117 5118 PetscFunctionBegin; 5119 /* attach interface graph for determining subsets */ 5120 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5121 IS verticesIS,verticescomm; 5122 PetscInt vsize,*idxs; 5123 5124 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5125 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5126 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5127 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5128 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5129 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5130 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5131 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5132 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5133 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5134 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5135 /* 5136 if (pcbddc->dbg_flag) { 5137 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5138 } 5139 */ 5140 } else { 5141 graph = pcbddc->mat_graph; 5142 } 5143 5144 /* sub_schurs init */ 5145 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5146 5147 /* free graph struct */ 5148 if (pcbddc->sub_schurs_rebuild) { 5149 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5150 } 5151 PetscFunctionReturn(0); 5152 } 5153