1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignPopOrPushP0" 10 PetscErrorCode PCBDDCBenignPopOrPushP0(PC pc, Vec v, PetscBool pop) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 if (!pcbddc->benign_sf) { 17 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 18 if (pcbddc->benign_p0_gidx >= 0) { 19 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 20 } else { 21 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 22 } 23 } 24 if (pop) { /* use SF to get values */ 25 PetscScalar *array; 26 27 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 28 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 29 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 30 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 31 } else { /* use VecSetValue */ 32 if (pcbddc->benign_p0_gidx >= 0) { 33 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 34 } 35 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 36 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 43 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 44 { 45 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 46 PetscErrorCode ierr; 47 48 PetscFunctionBegin; 49 /* TODO: add error checking 50 - avoid nested pop (or push) calls. 51 - cannot push before pop. 52 */ 53 if (pcbddc->benign_p0_lidx < 0) { 54 PetscFunctionReturn(0); 55 } 56 if (pop) { 57 const PetscInt *cB0_cols; 58 PetscInt cB0_ncol; 59 const PetscScalar *cB0_vals; 60 61 /* extract B_0 */ 62 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 63 pcbddc->B0_ncol = cB0_ncol; 64 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 65 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 66 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 67 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 68 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 69 /* remove rows and cols from local problem */ 70 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 71 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 72 } else { /* push */ 73 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 74 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 75 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 76 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 77 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 78 } 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "PCBDDCAdaptiveSelection" 84 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 85 { 86 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 87 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 88 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 89 PetscBLASInt *B_iwork,*B_ifail; 90 PetscScalar *work,lwork; 91 PetscScalar *St,*S,*eigv; 92 PetscScalar *Sarray,*Starray; 93 PetscReal *eigs,thresh; 94 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 95 PetscBool allocated_S_St; 96 #if defined(PETSC_USE_COMPLEX) 97 PetscReal *rwork; 98 #endif 99 PetscErrorCode ierr; 100 101 PetscFunctionBegin; 102 if (!sub_schurs->use_mumps) { 103 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 104 } 105 106 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 107 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 108 } 109 110 if (pcbddc->dbg_flag) { 111 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 112 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 113 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 114 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 115 } 116 117 if (pcbddc->dbg_flag) { 118 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 119 } 120 121 /* max size of subsets */ 122 mss = 0; 123 for (i=0;i<sub_schurs->n_subs;i++) { 124 PetscInt subset_size; 125 126 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 127 mss = PetscMax(mss,subset_size); 128 } 129 130 /* min/max and threshold */ 131 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 132 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 133 nmax = PetscMax(nmin,nmax); 134 allocated_S_St = PETSC_FALSE; 135 if (nmin) { 136 allocated_S_St = PETSC_TRUE; 137 } 138 139 /* allocate lapack workspace */ 140 cum = cum2 = 0; 141 maxneigs = 0; 142 for (i=0;i<sub_schurs->n_subs;i++) { 143 PetscInt n,subset_size; 144 145 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 146 n = PetscMin(subset_size,nmax); 147 cum += subset_size; 148 cum2 += subset_size*n; 149 maxneigs = PetscMax(maxneigs,n); 150 } 151 if (mss) { 152 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 153 PetscBLASInt B_itype = 1; 154 PetscBLASInt B_N = mss; 155 PetscReal zero = 0.0; 156 PetscReal eps = 0.0; /* dlamch? */ 157 158 B_lwork = -1; 159 S = NULL; 160 St = NULL; 161 eigs = NULL; 162 eigv = NULL; 163 B_iwork = NULL; 164 B_ifail = NULL; 165 #if defined(PETSC_USE_COMPLEX) 166 rwork = NULL; 167 #endif 168 thresh = 1.0; 169 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 170 #if defined(PETSC_USE_COMPLEX) 171 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 172 #else 173 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 174 #endif 175 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 176 ierr = PetscFPTrapPop();CHKERRQ(ierr); 177 } else { 178 /* TODO */ 179 } 180 } else { 181 lwork = 0; 182 } 183 184 nv = 0; 185 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 186 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 187 } 188 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 189 if (allocated_S_St) { 190 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 191 } 192 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 193 #if defined(PETSC_USE_COMPLEX) 194 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 195 #endif 196 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 197 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 198 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 199 nv+cum,&pcbddc->adaptive_constraints_idxs, 200 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 201 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 202 203 maxneigs = 0; 204 cum = cum2 = cumarray = 0; 205 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 206 pcbddc->adaptive_constraints_data_ptr[0] = 0; 207 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 208 const PetscInt *idxs; 209 210 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 211 for (cum=0;cum<nv;cum++) { 212 pcbddc->adaptive_constraints_n[cum] = 1; 213 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 214 pcbddc->adaptive_constraints_data[cum] = 1.0; 215 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 216 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 217 } 218 cum2 = cum; 219 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 220 } 221 222 if (mss) { /* multilevel */ 223 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 224 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 225 } 226 227 for (i=0;i<sub_schurs->n_subs;i++) { 228 229 const PetscInt *idxs; 230 PetscReal infty = PETSC_MAX_REAL; 231 PetscInt j,subset_size,eigs_start = 0; 232 PetscBLASInt B_N; 233 PetscBool same_data = PETSC_FALSE; 234 235 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 236 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 237 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 238 if (sub_schurs->is_hermitian) { 239 PetscInt j,k; 240 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 241 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 242 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 } 244 for (j=0;j<subset_size;j++) { 245 for (k=j;k<subset_size;k++) { 246 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 247 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 248 } 249 } 250 } else { 251 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 252 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 253 } 254 } else { 255 S = Sarray + cumarray; 256 St = Starray + cumarray; 257 } 258 259 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 260 /* see if we can save some work */ 261 if (sub_schurs->n_subs == 1) { 262 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 263 } 264 265 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 266 B_neigs = 0; 267 } else { 268 /* Threshold: this is an heuristic for edges */ 269 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 270 271 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 272 PetscBLASInt B_itype = 1; 273 PetscBLASInt B_IL, B_IU; 274 PetscReal eps = -1.0; /* dlamch? */ 275 PetscInt nmin_s; 276 277 /* ask for eigenvalues larger than thresh */ 278 if (pcbddc->dbg_flag) { 279 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 280 } 281 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 282 #if defined(PETSC_USE_COMPLEX) 283 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 284 #else 285 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 286 #endif 287 ierr = PetscFPTrapPop();CHKERRQ(ierr); 288 if (B_ierr) { 289 if (B_ierr < 0 ) { 290 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 291 } else if (B_ierr <= B_N) { 292 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 293 } else { 294 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 295 } 296 } 297 298 if (B_neigs > nmax) { 299 if (pcbddc->dbg_flag) { 300 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 301 } 302 eigs_start = B_neigs -nmax; 303 B_neigs = nmax; 304 } 305 306 nmin_s = PetscMin(nmin,B_N); 307 if (B_neigs < nmin_s) { 308 PetscBLASInt B_neigs2; 309 310 B_IU = B_N - B_neigs; 311 B_IL = B_N - nmin_s + 1; 312 if (pcbddc->dbg_flag) { 313 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 314 } 315 if (sub_schurs->is_hermitian) { 316 PetscInt j; 317 for (j=0;j<subset_size;j++) { 318 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 319 } 320 for (j=0;j<subset_size;j++) { 321 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 322 } 323 } else { 324 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 325 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 326 } 327 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 328 #if defined(PETSC_USE_COMPLEX) 329 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 330 #else 331 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 332 #endif 333 ierr = PetscFPTrapPop();CHKERRQ(ierr); 334 B_neigs += B_neigs2; 335 } 336 if (B_ierr) { 337 if (B_ierr < 0 ) { 338 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 339 } else if (B_ierr <= B_N) { 340 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 341 } else { 342 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 343 } 344 } 345 if (pcbddc->dbg_flag) { 346 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 347 for (j=0;j<B_neigs;j++) { 348 if (eigs[j] == 0.0) { 349 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 350 } else { 351 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 352 } 353 } 354 } 355 } else { 356 /* TODO */ 357 } 358 } 359 maxneigs = PetscMax(B_neigs,maxneigs); 360 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 361 if (B_neigs) { 362 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 363 364 if (pcbddc->dbg_flag > 1) { 365 PetscInt ii; 366 for (ii=0;ii<B_neigs;ii++) { 367 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 368 for (j=0;j<B_N;j++) { 369 #if defined(PETSC_USE_COMPLEX) 370 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 371 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 372 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 373 #else 374 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 375 #endif 376 } 377 } 378 } 379 #if 0 380 for (j=0;j<B_neigs;j++) { 381 PetscBLASInt Blas_N,Blas_one = 1.0; 382 PetscScalar norm; 383 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 384 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 385 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 386 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 387 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 388 } else { 389 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 390 } 391 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 392 } 393 #endif 394 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 395 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 396 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 397 cum++; 398 } 399 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 400 /* shift for next computation */ 401 cumarray += subset_size*subset_size; 402 } 403 if (pcbddc->dbg_flag) { 404 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 405 } 406 407 if (mss) { 408 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 409 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 410 /* destroy matrices (junk) */ 411 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 412 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 413 } 414 if (allocated_S_St) { 415 ierr = PetscFree2(S,St);CHKERRQ(ierr); 416 } 417 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 418 #if defined(PETSC_USE_COMPLEX) 419 ierr = PetscFree(rwork);CHKERRQ(ierr); 420 #endif 421 if (pcbddc->dbg_flag) { 422 PetscInt maxneigs_r; 423 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 424 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "PCBDDCSetUpSolvers" 431 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 432 { 433 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 434 PetscScalar *coarse_submat_vals; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 /* Setup local scatters R_to_B and (optionally) R_to_D */ 439 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 440 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 441 442 /* Setup local neumann solver ksp_R */ 443 /* PCBDDCSetUpLocalScatters should be called first! */ 444 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 445 446 /* Change global null space passed in by the user if change of basis has been requested */ 447 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 448 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 449 } 450 451 /* 452 Setup local correction and local part of coarse basis. 453 Gives back the dense local part of the coarse matrix in column major ordering 454 */ 455 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 456 457 /* Compute total number of coarse nodes and setup coarse solver */ 458 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 459 460 /* free */ 461 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 #undef __FUNCT__ 466 #define __FUNCT__ "PCBDDCResetCustomization" 467 PetscErrorCode PCBDDCResetCustomization(PC pc) 468 { 469 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 470 PetscErrorCode ierr; 471 472 PetscFunctionBegin; 473 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 474 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 475 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 476 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 477 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 478 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 479 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 480 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 481 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 482 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 483 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "PCBDDCResetTopography" 489 PetscErrorCode PCBDDCResetTopography(PC pc) 490 { 491 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 496 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 497 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 498 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 499 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNCT__ 504 #define __FUNCT__ "PCBDDCResetSolvers" 505 PetscErrorCode PCBDDCResetSolvers(PC pc) 506 { 507 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 508 PetscScalar *array; 509 PetscErrorCode ierr; 510 511 PetscFunctionBegin; 512 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 513 if (pcbddc->coarse_phi_B) { 514 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 515 ierr = PetscFree(array);CHKERRQ(ierr); 516 } 517 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 518 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 519 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 520 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 521 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 523 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 524 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 525 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 526 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 527 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 528 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 529 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 530 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 531 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 532 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 533 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 534 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 535 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 536 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 537 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 538 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 539 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 540 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 541 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 542 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 543 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 549 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 550 { 551 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 552 PC_IS *pcis = (PC_IS*)pc->data; 553 VecType impVecType; 554 PetscInt n_constraints,n_R,old_size,n_benign; 555 PetscErrorCode ierr; 556 557 PetscFunctionBegin; 558 if (!pcbddc->ConstraintMatrix) { 559 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 560 } 561 /* get sizes */ 562 n_benign = 0; 563 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 564 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 565 if (n_benign && n_constraints) { 566 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot handle benign susbspace trick without change of basis"); 567 } 568 n_R = pcis->n - pcbddc->n_vertices; 569 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 570 /* local work vectors (try to avoid unneeded work)*/ 571 /* R nodes */ 572 old_size = -1; 573 if (pcbddc->vec1_R) { 574 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 575 } 576 if (n_R != old_size) { 577 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 578 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 579 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 580 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 581 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 582 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 583 } 584 /* local primal dofs */ 585 old_size = -1; 586 if (pcbddc->vec1_P) { 587 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 588 } 589 if (pcbddc->local_primal_size != old_size) { 590 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 591 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 592 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 593 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 594 } 595 /* local explicit constraints */ 596 old_size = -1; 597 if (pcbddc->vec1_C) { 598 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 599 } 600 if (n_constraints && n_constraints != old_size) { 601 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 602 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 603 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 604 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 605 } 606 PetscFunctionReturn(0); 607 } 608 609 #undef __FUNCT__ 610 #define __FUNCT__ "PCBDDCSetUpCorrection" 611 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 612 { 613 PetscErrorCode ierr; 614 /* pointers to pcis and pcbddc */ 615 PC_IS* pcis = (PC_IS*)pc->data; 616 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 617 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 618 /* submatrices of local problem */ 619 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 620 /* submatrices of benign trick */ 621 Mat B0_R,B0_V; 622 /* submatrices of local coarse problem */ 623 Mat S_VV,S_CV,S_VC,S_CC; 624 /* working matrices */ 625 Mat C_CR; 626 /* additional working stuff */ 627 PC pc_R; 628 Mat F,B0 = NULL; 629 Vec vec_R_benign,vec_R_benign_2; 630 PetscBool isLU,isCHOL,isILU; 631 632 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 633 PetscScalar *work; 634 PetscInt *idx_V_B; 635 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 636 PetscInt i,n_R,n_D,n_B; 637 PetscBool unsymmetric_check; 638 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 639 MatType impMatType; 640 /* some shortcuts to scalars */ 641 PetscScalar one=1.0,m_one=-1.0; 642 643 PetscFunctionBegin; 644 n_benign = 0; 645 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 646 n_vertices = pcbddc->n_vertices; 647 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 648 /* Set Non-overlapping dimensions */ 649 n_B = pcis->n_B; 650 n_D = pcis->n - n_B; 651 n_R = pcis->n - n_vertices; 652 653 /* Set types for local objects needed by BDDC precondtioner */ 654 impMatType = MATSEQDENSE; 655 656 /* vertices in boundary numbering */ 657 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 658 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 659 if (i != n_vertices) { 660 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 661 } 662 663 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 664 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 665 if (!n_benign) { 666 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 667 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 668 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 669 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 670 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 671 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 672 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 673 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 674 } else { /* these wrappers are not needed with the benign trick */ 675 S_VV = S_CV = S_VC = S_CC = NULL; 676 } 677 678 unsymmetric_check = PETSC_FALSE; 679 /* allocate workspace */ 680 n = 0; 681 if (n_constraints) { 682 n += n_R*n_constraints; 683 } 684 if (n_vertices) { 685 n = PetscMax(2*n_R*n_vertices,n); 686 n = PetscMax((n_R+n_B)*n_vertices,n); 687 } 688 if (!pcbddc->symmetric_primal) { 689 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 690 unsymmetric_check = PETSC_TRUE; 691 } 692 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 693 694 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 695 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 696 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 697 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 698 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 699 if (isLU || isILU || isCHOL) { 700 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 701 } else if (sub_schurs->reuse_mumps) { 702 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 703 MatFactorType type; 704 705 F = reuse_mumps->F; 706 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 707 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 708 } else { 709 F = NULL; 710 } 711 712 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 713 if (n_constraints) { 714 Mat M1,M2,M3; 715 Mat auxmat; 716 IS is_aux; 717 PetscScalar *array,*array2; 718 719 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 720 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 721 722 /* Extract constraints on R nodes: C_{CR} */ 723 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 724 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 725 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 726 727 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 728 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 729 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 730 for (i=0;i<n_constraints;i++) { 731 const PetscScalar *row_cmat_values; 732 const PetscInt *row_cmat_indices; 733 PetscInt size_of_constraint,j; 734 735 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 736 for (j=0;j<size_of_constraint;j++) { 737 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 738 } 739 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 740 } 741 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 742 if (F) { 743 Mat B; 744 745 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 746 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 747 ierr = MatDestroy(&B);CHKERRQ(ierr); 748 } else { 749 PetscScalar *marr; 750 751 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 752 for (i=0;i<n_constraints;i++) { 753 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 754 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 755 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 756 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 757 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 758 } 759 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 760 } 761 if (!pcbddc->switch_static) { 762 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 763 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 764 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 765 for (i=0;i<n_constraints;i++) { 766 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 767 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 768 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 769 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 770 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 771 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 772 } 773 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 774 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 775 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 776 } else { 777 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 778 pcbddc->local_auxmat2 = local_auxmat2_R; 779 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 780 } 781 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 782 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 783 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 784 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 785 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 786 if (isCHOL) { 787 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 788 } else { 789 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 790 } 791 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 792 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 793 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 794 ierr = MatDestroy(&M2);CHKERRQ(ierr); 795 ierr = MatDestroy(&M3);CHKERRQ(ierr); 796 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 797 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 798 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 799 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 800 ierr = MatDestroy(&M1);CHKERRQ(ierr); 801 } 802 /* Get submatrices from subdomain matrix */ 803 if (n_vertices) { 804 IS is_aux; 805 806 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 807 IS tis; 808 809 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 810 ierr = ISSort(tis);CHKERRQ(ierr); 811 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 812 ierr = ISDestroy(&tis);CHKERRQ(ierr); 813 } else { 814 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 815 } 816 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 817 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 818 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 819 if (n_benign) { 820 IS dummy; 821 PetscInt ii[2]; 822 823 ii[0] = 0; 824 ii[1] = pcbddc->B0_ncol; 825 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 826 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 827 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 828 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 829 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 830 } 831 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 832 } 833 834 /* Matrix of coarse basis functions (local) */ 835 if (pcbddc->coarse_phi_B) { 836 PetscInt on_B,on_primal,on_D=n_D; 837 if (pcbddc->coarse_phi_D) { 838 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 839 } 840 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 841 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 842 PetscScalar *marray; 843 844 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 845 ierr = PetscFree(marray);CHKERRQ(ierr); 846 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 847 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 848 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 849 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 850 } 851 } 852 853 if (!pcbddc->coarse_phi_B) { 854 PetscScalar *marray; 855 856 n = n_B*pcbddc->local_primal_size; 857 if (pcbddc->switch_static || pcbddc->dbg_flag) { 858 n += n_D*pcbddc->local_primal_size; 859 } 860 if (!pcbddc->symmetric_primal) { 861 n *= 2; 862 } 863 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 864 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 865 n = n_B*pcbddc->local_primal_size; 866 if (pcbddc->switch_static || pcbddc->dbg_flag) { 867 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 868 n += n_D*pcbddc->local_primal_size; 869 } 870 if (!pcbddc->symmetric_primal) { 871 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 872 if (pcbddc->switch_static || pcbddc->dbg_flag) { 873 n = n_B*pcbddc->local_primal_size; 874 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 875 } 876 } else { 877 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 878 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 879 if (pcbddc->switch_static || pcbddc->dbg_flag) { 880 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 881 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 882 } 883 } 884 } 885 886 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 887 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 888 const PetscInt *idxs; 889 890 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 891 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 892 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 893 } 894 /* vertices */ 895 if (n_vertices) { 896 897 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 898 899 if (n_R) { 900 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 901 Mat S_V_benign; 902 PetscBLASInt B_N,B_one = 1; 903 PetscScalar *x,*y; 904 PetscBool isseqaij; 905 906 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 907 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 908 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 909 if (F) { /* TODO could be optimized for symmetric problems */ 910 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 911 } else { 912 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 913 for (i=0;i<n_vertices;i++) { 914 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 915 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 916 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 917 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 918 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 919 } 920 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 921 } 922 if (n_benign) { 923 const PetscScalar *vals; 924 const PetscInt *idxs; 925 PetscInt n; 926 927 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign);CHKERRQ(ierr); 928 ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign_2);CHKERRQ(ierr); 929 ierr = VecSet(vec_R_benign_2,0.);CHKERRQ(ierr); 930 ierr = MatGetRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 931 ierr = VecSetValues(vec_R_benign_2,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 932 ierr = VecAssemblyBegin(vec_R_benign_2);CHKERRQ(ierr); 933 ierr = VecAssemblyEnd(vec_R_benign_2);CHKERRQ(ierr); 934 ierr = MatRestoreRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr); 935 ierr = KSPSolve(pcbddc->ksp_R,vec_R_benign_2,vec_R_benign);CHKERRQ(ierr); 936 ierr = VecScale(vec_R_benign,m_one);CHKERRQ(ierr); 937 } 938 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 939 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 940 if (n_constraints) { 941 Mat B; 942 943 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 944 for (i=0;i<n_vertices;i++) { 945 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 946 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 947 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 949 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 950 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 951 } 952 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 953 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 954 ierr = MatDestroy(&B);CHKERRQ(ierr); 955 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 956 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 957 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 958 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 959 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 960 ierr = MatDestroy(&B);CHKERRQ(ierr); 961 } 962 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 963 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 964 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 965 } 966 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 967 if (n_benign) { 968 ierr = MatMatMult(B0_R,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_V_benign);CHKERRQ(ierr); 969 } 970 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 971 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 972 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 973 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 974 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 975 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 976 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 977 if (!n_benign) { 978 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 979 } else { 980 const PetscScalar *vals; 981 PetscScalar val; 982 const PetscInt *idxs; 983 PetscInt n,j; 984 985 /* Use explicit loops to populate the local coarse matrix */ 986 ierr = MatDenseGetArray(S_VVt,&x);CHKERRQ(ierr); 987 for (j=0;j<n_vertices;j++) { 988 for (i=0;i<n_vertices;i++) { 989 coarse_submat_vals[j*pcbddc->local_primal_size+i] = x[j*n_vertices+i]; 990 } 991 } 992 ierr = MatDenseRestoreArray(S_VVt,&x);CHKERRQ(ierr); 993 ierr = MatDenseGetArray(S_V_benign,&x);CHKERRQ(ierr); 994 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 995 for (j=0;j<n_vertices;j++) { 996 coarse_submat_vals[n_vertices*pcbddc->local_primal_size+j] = x[j]+vals[j]; 997 coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices] = x[j]+vals[j]; 998 } 999 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1000 ierr = MatDenseRestoreArray(S_V_benign,&x);CHKERRQ(ierr); 1001 ierr = MatDestroy(&S_V_benign);CHKERRQ(ierr); 1002 ierr = VecDot(vec_R_benign_2,vec_R_benign,&val);CHKERRQ(ierr); 1003 coarse_submat_vals[pcbddc->local_primal_size*pcbddc->local_primal_size-1] = val; 1004 } 1005 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1006 } else { 1007 /* TODO benign case also here */ 1008 if (n_benign) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not yet implemented"); 1009 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1010 } 1011 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1012 /* coarse basis functions */ 1013 for (i=0;i<n_vertices;i++) { 1014 PetscScalar *y; 1015 1016 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1017 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1018 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1019 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1020 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1021 y[n_B*i+idx_V_B[i]] = 1.0; 1022 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1023 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1024 1025 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1026 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1027 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1031 if (n_benign) y[n_D*i+p0_lidx_I] = 0.0; 1032 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1033 } 1034 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1035 } 1036 if (n_benign) { 1037 PetscScalar *y; 1038 1039 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1040 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*n_vertices);CHKERRQ(ierr); 1041 ierr = VecScatterBegin(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecScatterEnd(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1043 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1044 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1045 1046 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1047 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1048 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*n_vertices);CHKERRQ(ierr); 1049 ierr = VecScatterBegin(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1050 ierr = VecScatterEnd(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1051 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1052 y[n_D*n_vertices+p0_lidx_I] = 0.0; 1053 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1054 } 1055 } 1056 /* if n_R == 0 the object is not destroyed */ 1057 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1058 } 1059 1060 if (n_constraints) { 1061 Mat B; 1062 1063 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1064 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1065 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1066 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1067 if (n_vertices) { 1068 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1069 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1070 } else { 1071 Mat S_VCt; 1072 1073 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1074 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1075 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1076 } 1077 } 1078 ierr = MatDestroy(&B);CHKERRQ(ierr); 1079 /* coarse basis functions */ 1080 for (i=0;i<n_constraints;i++) { 1081 PetscScalar *y; 1082 1083 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1084 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1085 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1086 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1089 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1090 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1091 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1092 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1093 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1094 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1095 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1096 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1097 } 1098 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1099 } 1100 } 1101 if (n_constraints) { 1102 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1103 } 1104 1105 if (n_benign) { 1106 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1107 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1108 ierr = VecDestroy(&vec_R_benign);CHKERRQ(ierr); 1109 } 1110 1111 /* compute other basis functions for non-symmetric problems */ 1112 if (!pcbddc->symmetric_primal) { 1113 1114 if (n_constraints) { 1115 Mat S_CCT,B_C; 1116 1117 /* this is a lazy thing */ 1118 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1119 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1120 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1121 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1122 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1123 if (n_vertices) { 1124 Mat B_V,S_VCT; 1125 1126 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1127 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1128 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1129 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1130 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1131 } 1132 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1133 } else { /* if there are no constraints, reset work */ 1134 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1135 } 1136 if (n_vertices && n_R) { 1137 Mat A_VRT; 1138 PetscScalar *marray; 1139 PetscBLASInt B_N,B_one = 1; 1140 1141 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1142 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1143 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1144 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1145 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1146 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1147 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1148 } 1149 1150 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1151 for (i=0;i<pcbddc->local_primal_size;i++) { 1152 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1153 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1154 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1155 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1156 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1157 } 1158 } else { 1159 for (i=0;i<pcbddc->local_primal_size;i++) { 1160 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1161 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1162 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1163 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1164 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1165 } 1166 } 1167 /* coarse basis functions */ 1168 for (i=0;i<pcbddc->local_primal_size;i++) { 1169 PetscScalar *y; 1170 1171 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1172 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1173 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1174 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1175 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1176 if (i<n_vertices) { 1177 y[n_B*i+idx_V_B[i]] = 1.0; 1178 } 1179 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1180 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1181 1182 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1183 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1184 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1185 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1186 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1187 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1188 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1189 } 1190 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1191 } 1192 } 1193 /* free memory */ 1194 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1195 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1196 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1197 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1198 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1199 ierr = PetscFree(work);CHKERRQ(ierr); 1200 if (n_vertices) { 1201 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1202 } 1203 if (n_constraints) { 1204 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1205 } 1206 /* Checking coarse_sub_mat and coarse basis functios */ 1207 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1208 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1209 if (pcbddc->dbg_flag) { 1210 Mat coarse_sub_mat; 1211 Mat AUXMAT,TM1,TM2,TM3,TM4; 1212 Mat coarse_phi_D,coarse_phi_B; 1213 Mat coarse_psi_D,coarse_psi_B; 1214 Mat A_II,A_BB,A_IB,A_BI; 1215 Mat C_B,CPHI; 1216 IS is_dummy; 1217 Vec mones; 1218 MatType checkmattype=MATSEQAIJ; 1219 PetscReal real_value; 1220 1221 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1222 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1223 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1224 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1225 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1226 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1227 if (unsymmetric_check) { 1228 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1229 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1230 } 1231 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1232 1233 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1234 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1235 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1236 if (unsymmetric_check) { 1237 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1238 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1239 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1240 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1241 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1242 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1243 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1244 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1245 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1246 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1247 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1248 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1249 } else { 1250 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1251 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1252 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1253 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1254 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1255 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1256 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1257 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1258 } 1259 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1260 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1261 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1262 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1263 if (n_benign) { 1264 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1265 PetscScalar *data,*data2; 1266 1267 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1268 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1269 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1270 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1271 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1272 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1273 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1274 for (i=0;i<pcbddc->local_primal_size;i++) { 1275 data[n_vertices*pcbddc->local_primal_size+i] += data2[i]; 1276 data[i*pcbddc->local_primal_size+n_vertices] += data2[i]; 1277 } 1278 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1279 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1280 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1281 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1282 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1283 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1284 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1285 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1286 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1287 } 1288 #if 0 1289 { 1290 PetscViewer viewer; 1291 char filename[256]; 1292 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1293 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1294 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1295 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1296 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1297 } 1298 #endif 1299 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1300 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1301 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1302 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1303 1304 /* check constraints */ 1305 if (!n_benign) { 1306 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1307 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1308 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1309 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1310 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1311 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1312 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1313 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1314 if (unsymmetric_check) { 1315 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1316 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1317 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1318 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1320 } 1321 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1322 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1323 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1324 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1325 } 1326 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1327 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1328 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1329 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1330 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1331 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1332 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1333 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1334 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1335 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1336 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1337 if (unsymmetric_check) { 1338 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1339 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1340 } 1341 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1342 } 1343 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1344 /* get back data */ 1345 *coarse_submat_vals_n = coarse_submat_vals; 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1351 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1352 { 1353 Mat *work_mat; 1354 IS isrow_s,iscol_s; 1355 PetscBool rsorted,csorted; 1356 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1357 PetscErrorCode ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1361 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1362 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1363 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1364 1365 if (!rsorted) { 1366 const PetscInt *idxs; 1367 PetscInt *idxs_sorted,i; 1368 1369 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1370 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1371 for (i=0;i<rsize;i++) { 1372 idxs_perm_r[i] = i; 1373 } 1374 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1375 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1376 for (i=0;i<rsize;i++) { 1377 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1378 } 1379 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1380 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1383 isrow_s = isrow; 1384 } 1385 1386 if (!csorted) { 1387 if (isrow == iscol) { 1388 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1389 iscol_s = isrow_s; 1390 } else { 1391 const PetscInt *idxs; 1392 PetscInt *idxs_sorted,i; 1393 1394 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1395 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1396 for (i=0;i<csize;i++) { 1397 idxs_perm_c[i] = i; 1398 } 1399 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1400 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1401 for (i=0;i<csize;i++) { 1402 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1403 } 1404 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1405 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1406 } 1407 } else { 1408 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1409 iscol_s = iscol; 1410 } 1411 1412 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1413 1414 if (!rsorted || !csorted) { 1415 Mat new_mat; 1416 IS is_perm_r,is_perm_c; 1417 1418 if (!rsorted) { 1419 PetscInt *idxs_r,i; 1420 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1421 for (i=0;i<rsize;i++) { 1422 idxs_r[idxs_perm_r[i]] = i; 1423 } 1424 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1425 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1426 } else { 1427 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1428 } 1429 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1430 1431 if (!csorted) { 1432 if (isrow_s == iscol_s) { 1433 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1434 is_perm_c = is_perm_r; 1435 } else { 1436 PetscInt *idxs_c,i; 1437 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1438 for (i=0;i<csize;i++) { 1439 idxs_c[idxs_perm_c[i]] = i; 1440 } 1441 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1442 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1443 } 1444 } else { 1445 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1446 } 1447 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1448 1449 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1450 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1451 work_mat[0] = new_mat; 1452 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1453 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1454 } 1455 1456 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1457 *B = work_mat[0]; 1458 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1459 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1460 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1466 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1467 { 1468 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1469 PC_IS* pcis = (PC_IS*)pc->data; 1470 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1471 Mat new_mat; 1472 IS is_local,is_global; 1473 PetscInt local_size; 1474 PetscBool isseqaij; 1475 PetscErrorCode ierr; 1476 1477 PetscFunctionBegin; 1478 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1479 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1480 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1481 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr); 1482 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1483 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1484 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1485 1486 /* check */ 1487 if (pcbddc->dbg_flag) { 1488 Vec x,x_change; 1489 PetscReal error; 1490 1491 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1492 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1493 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1494 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1495 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1497 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1498 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1499 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1500 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1501 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1502 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1503 ierr = VecDestroy(&x);CHKERRQ(ierr); 1504 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1505 } 1506 1507 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1508 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1509 if (isseqaij) { 1510 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1511 } else { 1512 Mat work_mat; 1513 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1514 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1515 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1516 } 1517 if (matis->A->symmetric_set) { 1518 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1519 #if !defined(PETSC_USE_COMPLEX) 1520 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1521 #endif 1522 } 1523 /* 1524 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1525 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1526 */ 1527 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1533 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1534 { 1535 PC_IS* pcis = (PC_IS*)(pc->data); 1536 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1537 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1538 PetscInt *idx_R_local=NULL; 1539 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1540 PetscInt vbs,bs; 1541 PetscBT bitmask=NULL; 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 /* 1546 No need to setup local scatters if 1547 - primal space is unchanged 1548 AND 1549 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1550 AND 1551 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1552 */ 1553 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1554 PetscFunctionReturn(0); 1555 } 1556 /* destroy old objects */ 1557 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1558 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1559 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1560 /* Set Non-overlapping dimensions */ 1561 n_B = pcis->n_B; 1562 n_D = pcis->n - n_B; 1563 n_vertices = pcbddc->n_vertices; 1564 1565 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1566 1567 /* create auxiliary bitmask and allocate workspace */ 1568 if (!sub_schurs->reuse_mumps) { 1569 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1570 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1571 for (i=0;i<n_vertices;i++) { 1572 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1573 } 1574 1575 for (i=0, n_R=0; i<pcis->n; i++) { 1576 if (!PetscBTLookup(bitmask,i)) { 1577 idx_R_local[n_R++] = i; 1578 } 1579 } 1580 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1581 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1582 1583 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1584 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1585 } 1586 1587 /* Block code */ 1588 vbs = 1; 1589 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1590 if (bs>1 && !(n_vertices%bs)) { 1591 PetscBool is_blocked = PETSC_TRUE; 1592 PetscInt *vary; 1593 if (!sub_schurs->reuse_mumps) { 1594 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1595 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1596 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1597 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1598 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1599 for (i=0; i<pcis->n/bs; i++) { 1600 if (vary[i]!=0 && vary[i]!=bs) { 1601 is_blocked = PETSC_FALSE; 1602 break; 1603 } 1604 } 1605 ierr = PetscFree(vary);CHKERRQ(ierr); 1606 } else { 1607 /* Verify directly the R set */ 1608 for (i=0; i<n_R/bs; i++) { 1609 PetscInt j,node=idx_R_local[bs*i]; 1610 for (j=1; j<bs; j++) { 1611 if (node != idx_R_local[bs*i+j]-j) { 1612 is_blocked = PETSC_FALSE; 1613 break; 1614 } 1615 } 1616 } 1617 } 1618 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1619 vbs = bs; 1620 for (i=0;i<n_R/vbs;i++) { 1621 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1622 } 1623 } 1624 } 1625 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1626 if (sub_schurs->reuse_mumps) { 1627 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1628 1629 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1630 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1631 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1632 reuse_mumps->is_R = pcbddc->is_R_local; 1633 } else { 1634 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1635 } 1636 1637 /* print some info if requested */ 1638 if (pcbddc->dbg_flag) { 1639 PetscInt benign = 0; 1640 1641 if (pcbddc->benign_p0_lidx >= 0) benign = 1; 1642 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1643 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1644 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1645 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1646 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1648 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1649 } 1650 1651 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1652 if (!sub_schurs->reuse_mumps) { 1653 IS is_aux1,is_aux2; 1654 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1655 1656 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1657 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1658 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1659 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1660 for (i=0; i<n_D; i++) { 1661 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1662 } 1663 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1664 for (i=0, j=0; i<n_R; i++) { 1665 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1666 aux_array1[j++] = i; 1667 } 1668 } 1669 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1670 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1671 for (i=0, j=0; i<n_B; i++) { 1672 if (!PetscBTLookup(bitmask,is_indices[i])) { 1673 aux_array2[j++] = i; 1674 } 1675 } 1676 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1677 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1678 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1679 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1680 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1681 1682 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1683 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1684 for (i=0, j=0; i<n_R; i++) { 1685 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1686 aux_array1[j++] = i; 1687 } 1688 } 1689 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1690 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1691 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1692 } 1693 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1694 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1695 } else { 1696 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1697 IS tis; 1698 PetscInt schur_size; 1699 1700 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1701 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1702 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1703 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1704 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1705 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1706 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1707 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1708 } 1709 } 1710 PetscFunctionReturn(0); 1711 } 1712 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1716 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1717 { 1718 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1719 PC_IS *pcis = (PC_IS*)pc->data; 1720 PC pc_temp; 1721 Mat A_RR; 1722 MatReuse reuse; 1723 PetscScalar m_one = -1.0; 1724 PetscReal value; 1725 PetscInt n_D,n_R; 1726 PetscBool use_exact,use_exact_reduced,issbaij; 1727 PetscErrorCode ierr; 1728 /* prefixes stuff */ 1729 char dir_prefix[256],neu_prefix[256],str_level[16]; 1730 size_t len; 1731 1732 PetscFunctionBegin; 1733 1734 /* compute prefixes */ 1735 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1736 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1737 if (!pcbddc->current_level) { 1738 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1739 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1740 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1741 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1742 } else { 1743 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1744 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1745 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1746 len -= 15; /* remove "pc_bddc_coarse_" */ 1747 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1748 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1749 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1750 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1751 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1752 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1753 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1754 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1755 } 1756 1757 /* DIRICHLET PROBLEM */ 1758 if (dirichlet) { 1759 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1760 if (pcbddc->local_mat->symmetric_set) { 1761 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1762 } 1763 /* Matrix for Dirichlet problem is pcis->A_II */ 1764 n_D = pcis->n - pcis->n_B; 1765 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1766 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1767 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1768 /* default */ 1769 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1770 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1771 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1772 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1773 if (issbaij) { 1774 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1775 } else { 1776 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1777 } 1778 /* Allow user's customization */ 1779 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1780 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1781 } 1782 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1783 if (sub_schurs->reuse_mumps) { 1784 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1785 1786 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1787 } 1788 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1789 if (!n_D) { 1790 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1791 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1792 } 1793 /* Set Up KSP for Dirichlet problem of BDDC */ 1794 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1795 /* set ksp_D into pcis data */ 1796 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1797 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1798 pcis->ksp_D = pcbddc->ksp_D; 1799 } 1800 1801 /* NEUMANN PROBLEM */ 1802 A_RR = 0; 1803 if (neumann) { 1804 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1805 PetscInt ibs,mbs; 1806 PetscBool issbaij; 1807 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1808 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1809 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1810 if (pcbddc->ksp_R) { /* already created ksp */ 1811 PetscInt nn_R; 1812 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1813 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1814 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1815 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1816 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1817 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1818 reuse = MAT_INITIAL_MATRIX; 1819 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1820 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1821 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1822 reuse = MAT_INITIAL_MATRIX; 1823 } else { /* safe to reuse the matrix */ 1824 reuse = MAT_REUSE_MATRIX; 1825 } 1826 } 1827 /* last check */ 1828 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1829 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1830 reuse = MAT_INITIAL_MATRIX; 1831 } 1832 } else { /* first time, so we need to create the matrix */ 1833 reuse = MAT_INITIAL_MATRIX; 1834 } 1835 /* extract A_RR */ 1836 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1837 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1838 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1839 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1840 if (matis->A == pcbddc->local_mat) { 1841 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1842 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1843 } else { 1844 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1845 } 1846 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1847 if (matis->A == pcbddc->local_mat) { 1848 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1849 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1850 } else { 1851 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1852 } 1853 } 1854 if (!sub_schurs->reuse_mumps) { 1855 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1856 if (pcbddc->local_mat->symmetric_set) { 1857 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1858 } 1859 } else { 1860 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1861 1862 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1863 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1864 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1865 } 1866 if (!pcbddc->ksp_R) { /* create object if not present */ 1867 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1868 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1869 /* default */ 1870 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1871 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1872 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1873 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1874 if (issbaij) { 1875 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1876 } else { 1877 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1878 } 1879 /* Allow user's customization */ 1880 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1881 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1882 } 1883 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1884 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1885 if (!n_R) { 1886 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1887 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1888 } 1889 /* Reuse MUMPS solver if it is present */ 1890 if (sub_schurs->reuse_mumps) { 1891 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1892 1893 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1894 } 1895 /* Set Up KSP for Neumann problem of BDDC */ 1896 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1897 } 1898 /* free Neumann problem's matrix */ 1899 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1900 1901 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1902 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1903 if (pcbddc->dbg_flag) { 1904 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1905 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1906 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1907 } 1908 if (dirichlet) { /* Dirichlet */ 1909 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1910 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1911 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1912 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1913 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1914 /* need to be adapted? */ 1915 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1916 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1917 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1918 /* print info */ 1919 if (pcbddc->dbg_flag) { 1920 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1921 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1922 } 1923 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1924 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1925 } 1926 } 1927 if (neumann) { /* Neumann */ 1928 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1929 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1930 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1931 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1932 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1933 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1934 /* need to be adapted? */ 1935 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1936 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1937 /* print info */ 1938 if (pcbddc->dbg_flag) { 1939 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1940 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1941 } 1942 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1943 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1944 } 1945 } 1946 } 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1952 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1953 { 1954 PetscErrorCode ierr; 1955 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1956 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1957 1958 PetscFunctionBegin; 1959 if (!sub_schurs->reuse_mumps) { 1960 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1961 } 1962 if (!pcbddc->switch_static) { 1963 if (applytranspose && pcbddc->local_auxmat1) { 1964 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1965 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1966 } 1967 if (!sub_schurs->reuse_mumps) { 1968 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1969 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1970 } else { 1971 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1972 1973 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1974 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1975 } 1976 } else { 1977 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1978 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1979 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1980 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1981 if (applytranspose && pcbddc->local_auxmat1) { 1982 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1983 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1984 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1985 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1986 } 1987 } 1988 if (!sub_schurs->reuse_mumps) { 1989 if (applytranspose) { 1990 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1991 } else { 1992 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1993 } 1994 #if defined(PETSC_HAVE_MUMPS) 1995 } else { 1996 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1997 1998 if (applytranspose) { 1999 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2000 } else { 2001 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2002 } 2003 #endif 2004 } 2005 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2006 if (!pcbddc->switch_static) { 2007 if (!sub_schurs->reuse_mumps) { 2008 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2009 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2010 } else { 2011 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2012 2013 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2014 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2015 } 2016 if (!applytranspose && pcbddc->local_auxmat1) { 2017 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2018 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2019 } 2020 } else { 2021 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2022 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2023 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2024 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2025 if (!applytranspose && pcbddc->local_auxmat1) { 2026 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2027 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2028 } 2029 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2030 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2031 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2032 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2033 } 2034 PetscFunctionReturn(0); 2035 } 2036 2037 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2040 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2041 { 2042 PetscErrorCode ierr; 2043 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2044 PC_IS* pcis = (PC_IS*) (pc->data); 2045 const PetscScalar zero = 0.0; 2046 2047 PetscFunctionBegin; 2048 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2049 if (applytranspose) { 2050 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2051 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2052 } else { 2053 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2054 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2055 } 2056 2057 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2058 if (pcbddc->benign_p0_lidx >= 0) { 2059 PetscScalar *array; 2060 2061 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2062 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2063 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2064 } 2065 2066 /* start communications from local primal nodes to rhs of coarse solver */ 2067 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2068 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2069 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2070 2071 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2072 /* TODO remove null space when doing multilevel */ 2073 if (pcbddc->coarse_ksp) { 2074 Vec rhs,sol; 2075 2076 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2077 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2078 if (applytranspose) { 2079 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2080 } else { 2081 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2082 } 2083 } 2084 2085 /* Local solution on R nodes */ 2086 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2087 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2088 } 2089 2090 /* communications from coarse sol to local primal nodes */ 2091 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2092 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2093 2094 /* Sum contributions from two levels */ 2095 if (applytranspose) { 2096 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2097 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2098 } else { 2099 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2100 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2101 } 2102 /* store p0 */ 2103 if (pcbddc->benign_p0_lidx >= 0) { 2104 PetscScalar *array; 2105 2106 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2107 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2108 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2109 } 2110 PetscFunctionReturn(0); 2111 } 2112 2113 #undef __FUNCT__ 2114 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2115 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2116 { 2117 PetscErrorCode ierr; 2118 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2119 PetscScalar *array; 2120 Vec from,to; 2121 2122 PetscFunctionBegin; 2123 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2124 from = pcbddc->coarse_vec; 2125 to = pcbddc->vec1_P; 2126 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2127 Vec tvec; 2128 2129 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2130 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2131 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2132 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2133 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2134 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2135 } 2136 } else { /* from local to global -> put data in coarse right hand side */ 2137 from = pcbddc->vec1_P; 2138 to = pcbddc->coarse_vec; 2139 } 2140 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 #undef __FUNCT__ 2145 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2146 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2147 { 2148 PetscErrorCode ierr; 2149 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2150 PetscScalar *array; 2151 Vec from,to; 2152 2153 PetscFunctionBegin; 2154 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2155 from = pcbddc->coarse_vec; 2156 to = pcbddc->vec1_P; 2157 } else { /* from local to global -> put data in coarse right hand side */ 2158 from = pcbddc->vec1_P; 2159 to = pcbddc->coarse_vec; 2160 } 2161 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2162 if (smode == SCATTER_FORWARD) { 2163 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2164 Vec tvec; 2165 2166 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2167 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2168 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2169 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2170 } 2171 } else { 2172 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2173 ierr = VecResetArray(from);CHKERRQ(ierr); 2174 } 2175 } 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* uncomment for testing purposes */ 2180 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2181 #undef __FUNCT__ 2182 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2183 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2184 { 2185 PetscErrorCode ierr; 2186 PC_IS* pcis = (PC_IS*)(pc->data); 2187 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2188 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2189 /* one and zero */ 2190 PetscScalar one=1.0,zero=0.0; 2191 /* space to store constraints and their local indices */ 2192 PetscScalar *constraints_data; 2193 PetscInt *constraints_idxs,*constraints_idxs_B; 2194 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2195 PetscInt *constraints_n; 2196 /* iterators */ 2197 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2198 /* BLAS integers */ 2199 PetscBLASInt lwork,lierr; 2200 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2201 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2202 /* reuse */ 2203 PetscInt olocal_primal_size,olocal_primal_size_cc; 2204 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2205 /* change of basis */ 2206 PetscBool qr_needed; 2207 PetscBT change_basis,qr_needed_idx; 2208 /* auxiliary stuff */ 2209 PetscInt *nnz,*is_indices; 2210 PetscInt ncc; 2211 /* some quantities */ 2212 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2213 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2214 2215 PetscFunctionBegin; 2216 /* Destroy Mat objects computed previously */ 2217 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2218 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2219 /* save info on constraints from previous setup (if any) */ 2220 olocal_primal_size = pcbddc->local_primal_size; 2221 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2222 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2223 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2224 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2225 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2226 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2227 2228 /* print some info */ 2229 if (pcbddc->dbg_flag) { 2230 IS vertices; 2231 PetscInt nv,nedges,nfaces; 2232 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2233 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2234 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2235 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2236 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2237 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2238 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2239 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2240 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2241 } 2242 2243 if (!pcbddc->adaptive_selection) { 2244 IS ISForVertices,*ISForFaces,*ISForEdges; 2245 MatNullSpace nearnullsp; 2246 const Vec *nearnullvecs; 2247 Vec *localnearnullsp; 2248 PetscScalar *array; 2249 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2250 PetscBool nnsp_has_cnst; 2251 /* LAPACK working arrays for SVD or POD */ 2252 PetscBool skip_lapack,boolforchange; 2253 PetscScalar *work; 2254 PetscReal *singular_vals; 2255 #if defined(PETSC_USE_COMPLEX) 2256 PetscReal *rwork; 2257 #endif 2258 #if defined(PETSC_MISSING_LAPACK_GESVD) 2259 PetscScalar *temp_basis,*correlation_mat; 2260 #else 2261 PetscBLASInt dummy_int=1; 2262 PetscScalar dummy_scalar=1.; 2263 #endif 2264 2265 /* Get index sets for faces, edges and vertices from graph */ 2266 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2267 /* free unneeded index sets */ 2268 if (!pcbddc->use_vertices) { 2269 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2270 } 2271 if (!pcbddc->use_edges) { 2272 for (i=0;i<n_ISForEdges;i++) { 2273 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2274 } 2275 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2276 n_ISForEdges = 0; 2277 } 2278 if (!pcbddc->use_faces) { 2279 for (i=0;i<n_ISForFaces;i++) { 2280 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2281 } 2282 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2283 n_ISForFaces = 0; 2284 } 2285 2286 #if defined(PETSC_USE_DEBUG) 2287 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2288 Also use_change_of_basis should be consistent among processors */ 2289 if (pcbddc->NullSpace) { 2290 PetscBool tbool[2],gbool[2]; 2291 2292 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2293 pcbddc->use_change_of_basis = PETSC_TRUE; 2294 if (!ISForEdges) { 2295 pcbddc->use_change_on_faces = PETSC_TRUE; 2296 } 2297 } 2298 tbool[0] = pcbddc->use_change_of_basis; 2299 tbool[1] = pcbddc->use_change_on_faces; 2300 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2301 pcbddc->use_change_of_basis = gbool[0]; 2302 pcbddc->use_change_on_faces = gbool[1]; 2303 } 2304 #endif 2305 2306 /* check if near null space is attached to global mat */ 2307 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2308 if (nearnullsp) { 2309 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2310 /* remove any stored info */ 2311 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2312 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2313 /* store information for BDDC solver reuse */ 2314 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2315 pcbddc->onearnullspace = nearnullsp; 2316 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2317 for (i=0;i<nnsp_size;i++) { 2318 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2319 } 2320 } else { /* if near null space is not provided BDDC uses constants by default */ 2321 nnsp_size = 0; 2322 nnsp_has_cnst = PETSC_TRUE; 2323 } 2324 /* get max number of constraints on a single cc */ 2325 max_constraints = nnsp_size; 2326 if (nnsp_has_cnst) max_constraints++; 2327 2328 /* 2329 Evaluate maximum storage size needed by the procedure 2330 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2331 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2332 There can be multiple constraints per connected component 2333 */ 2334 n_vertices = 0; 2335 if (ISForVertices) { 2336 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2337 } 2338 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2339 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2340 2341 total_counts = n_ISForFaces+n_ISForEdges; 2342 total_counts *= max_constraints; 2343 total_counts += n_vertices; 2344 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2345 2346 total_counts = 0; 2347 max_size_of_constraint = 0; 2348 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2349 IS used_is; 2350 if (i<n_ISForEdges) { 2351 used_is = ISForEdges[i]; 2352 } else { 2353 used_is = ISForFaces[i-n_ISForEdges]; 2354 } 2355 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2356 total_counts += j; 2357 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2358 } 2359 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2360 2361 /* get local part of global near null space vectors */ 2362 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2363 for (k=0;k<nnsp_size;k++) { 2364 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2365 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2366 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2367 } 2368 2369 /* whether or not to skip lapack calls */ 2370 skip_lapack = PETSC_TRUE; 2371 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2372 2373 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2374 if (!skip_lapack) { 2375 PetscScalar temp_work; 2376 2377 #if defined(PETSC_MISSING_LAPACK_GESVD) 2378 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2379 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2380 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2381 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2382 #if defined(PETSC_USE_COMPLEX) 2383 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2384 #endif 2385 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2386 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2387 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2388 lwork = -1; 2389 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2390 #if !defined(PETSC_USE_COMPLEX) 2391 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2392 #else 2393 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2394 #endif 2395 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2396 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2397 #else /* on missing GESVD */ 2398 /* SVD */ 2399 PetscInt max_n,min_n; 2400 max_n = max_size_of_constraint; 2401 min_n = max_constraints; 2402 if (max_size_of_constraint < max_constraints) { 2403 min_n = max_size_of_constraint; 2404 max_n = max_constraints; 2405 } 2406 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2407 #if defined(PETSC_USE_COMPLEX) 2408 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2409 #endif 2410 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2411 lwork = -1; 2412 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2413 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2414 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2415 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2416 #if !defined(PETSC_USE_COMPLEX) 2417 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2418 #else 2419 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2420 #endif 2421 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2422 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2423 #endif /* on missing GESVD */ 2424 /* Allocate optimal workspace */ 2425 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2426 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2427 } 2428 /* Now we can loop on constraining sets */ 2429 total_counts = 0; 2430 constraints_idxs_ptr[0] = 0; 2431 constraints_data_ptr[0] = 0; 2432 /* vertices */ 2433 if (n_vertices) { 2434 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2435 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2436 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2437 for (i=0;i<n_vertices;i++) { 2438 constraints_n[total_counts] = 1; 2439 constraints_data[total_counts] = 1.0; 2440 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2441 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2442 total_counts++; 2443 } 2444 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2445 PetscBool used_vertex; 2446 for (i=0;i<n_vertices;i++) { 2447 used_vertex = PETSC_FALSE; 2448 k = 0; 2449 while (!used_vertex && k<nnsp_size) { 2450 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2451 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2452 constraints_n[total_counts] = 1; 2453 constraints_idxs[total_counts] = is_indices[i]; 2454 constraints_data[total_counts] = 1.0; 2455 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2456 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2457 total_counts++; 2458 used_vertex = PETSC_TRUE; 2459 } 2460 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2461 k++; 2462 } 2463 } 2464 } 2465 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2466 n_vertices = total_counts; 2467 } 2468 2469 /* edges and faces */ 2470 total_counts_cc = total_counts; 2471 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2472 IS used_is; 2473 PetscBool idxs_copied = PETSC_FALSE; 2474 2475 if (ncc<n_ISForEdges) { 2476 used_is = ISForEdges[ncc]; 2477 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2478 } else { 2479 used_is = ISForFaces[ncc-n_ISForEdges]; 2480 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2481 } 2482 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2483 2484 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2485 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2486 /* change of basis should not be performed on local periodic nodes */ 2487 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2488 if (nnsp_has_cnst) { 2489 PetscScalar quad_value; 2490 2491 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2492 idxs_copied = PETSC_TRUE; 2493 2494 if (!pcbddc->use_nnsp_true) { 2495 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2496 } else { 2497 quad_value = 1.0; 2498 } 2499 for (j=0;j<size_of_constraint;j++) { 2500 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2501 } 2502 temp_constraints++; 2503 total_counts++; 2504 } 2505 for (k=0;k<nnsp_size;k++) { 2506 PetscReal real_value; 2507 PetscScalar *ptr_to_data; 2508 2509 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2510 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2511 for (j=0;j<size_of_constraint;j++) { 2512 ptr_to_data[j] = array[is_indices[j]]; 2513 } 2514 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2515 /* check if array is null on the connected component */ 2516 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2517 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2518 if (real_value > 0.0) { /* keep indices and values */ 2519 temp_constraints++; 2520 total_counts++; 2521 if (!idxs_copied) { 2522 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2523 idxs_copied = PETSC_TRUE; 2524 } 2525 } 2526 } 2527 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2528 valid_constraints = temp_constraints; 2529 if (!pcbddc->use_nnsp_true && temp_constraints) { 2530 if (temp_constraints == 1) { /* just normalize the constraint */ 2531 PetscScalar norm,*ptr_to_data; 2532 2533 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2535 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2536 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2537 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2538 } else { /* perform SVD */ 2539 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2540 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2541 2542 #if defined(PETSC_MISSING_LAPACK_GESVD) 2543 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2544 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2545 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2546 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2547 from that computed using LAPACKgesvd 2548 -> This is due to a different computation of eigenvectors in LAPACKheev 2549 -> The quality of the POD-computed basis will be the same */ 2550 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2551 /* Store upper triangular part of correlation matrix */ 2552 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2553 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2554 for (j=0;j<temp_constraints;j++) { 2555 for (k=0;k<j+1;k++) { 2556 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2557 } 2558 } 2559 /* compute eigenvalues and eigenvectors of correlation matrix */ 2560 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2561 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2562 #if !defined(PETSC_USE_COMPLEX) 2563 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2564 #else 2565 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2566 #endif 2567 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2568 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2569 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2570 j = 0; 2571 while (j < temp_constraints && singular_vals[j] < tol) j++; 2572 total_counts = total_counts-j; 2573 valid_constraints = temp_constraints-j; 2574 /* scale and copy POD basis into used quadrature memory */ 2575 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2576 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2577 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2578 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2579 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2580 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2581 if (j<temp_constraints) { 2582 PetscInt ii; 2583 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2584 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2585 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2586 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2587 for (k=0;k<temp_constraints-j;k++) { 2588 for (ii=0;ii<size_of_constraint;ii++) { 2589 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2590 } 2591 } 2592 } 2593 #else /* on missing GESVD */ 2594 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2595 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2596 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2597 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2598 #if !defined(PETSC_USE_COMPLEX) 2599 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2600 #else 2601 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2602 #endif 2603 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2604 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2605 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2606 k = temp_constraints; 2607 if (k > size_of_constraint) k = size_of_constraint; 2608 j = 0; 2609 while (j < k && singular_vals[k-j-1] < tol) j++; 2610 valid_constraints = k-j; 2611 total_counts = total_counts-temp_constraints+valid_constraints; 2612 #endif /* on missing GESVD */ 2613 } 2614 } 2615 /* update pointers information */ 2616 if (valid_constraints) { 2617 constraints_n[total_counts_cc] = valid_constraints; 2618 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2619 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2620 /* set change_of_basis flag */ 2621 if (boolforchange) { 2622 PetscBTSet(change_basis,total_counts_cc); 2623 } 2624 total_counts_cc++; 2625 } 2626 } 2627 /* free workspace */ 2628 if (!skip_lapack) { 2629 ierr = PetscFree(work);CHKERRQ(ierr); 2630 #if defined(PETSC_USE_COMPLEX) 2631 ierr = PetscFree(rwork);CHKERRQ(ierr); 2632 #endif 2633 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2634 #if defined(PETSC_MISSING_LAPACK_GESVD) 2635 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2636 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2637 #endif 2638 } 2639 for (k=0;k<nnsp_size;k++) { 2640 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2641 } 2642 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2643 /* free index sets of faces, edges and vertices */ 2644 for (i=0;i<n_ISForFaces;i++) { 2645 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2646 } 2647 if (n_ISForFaces) { 2648 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2649 } 2650 for (i=0;i<n_ISForEdges;i++) { 2651 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2652 } 2653 if (n_ISForEdges) { 2654 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2655 } 2656 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2657 } else { 2658 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2659 2660 total_counts = 0; 2661 n_vertices = 0; 2662 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2663 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2664 } 2665 max_constraints = 0; 2666 total_counts_cc = 0; 2667 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2668 total_counts += pcbddc->adaptive_constraints_n[i]; 2669 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2670 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2671 } 2672 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2673 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2674 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2675 constraints_data = pcbddc->adaptive_constraints_data; 2676 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2677 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2678 total_counts_cc = 0; 2679 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2680 if (pcbddc->adaptive_constraints_n[i]) { 2681 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2682 } 2683 } 2684 #if 0 2685 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2686 for (i=0;i<total_counts_cc;i++) { 2687 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2688 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2689 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2690 printf(" %d",constraints_idxs[j]); 2691 } 2692 printf("\n"); 2693 printf("number of cc: %d\n",constraints_n[i]); 2694 } 2695 for (i=0;i<n_vertices;i++) { 2696 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2697 } 2698 for (i=0;i<sub_schurs->n_subs;i++) { 2699 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2700 } 2701 #endif 2702 2703 max_size_of_constraint = 0; 2704 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2705 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2706 /* Change of basis */ 2707 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2708 if (pcbddc->use_change_of_basis) { 2709 for (i=0;i<sub_schurs->n_subs;i++) { 2710 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2711 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2712 } 2713 } 2714 } 2715 } 2716 pcbddc->local_primal_size = total_counts; 2717 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2718 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2719 2720 /* map constraints_idxs in boundary numbering */ 2721 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2722 if (i != constraints_idxs_ptr[total_counts_cc]) { 2723 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2724 } 2725 2726 /* Create constraint matrix */ 2727 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2728 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2729 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2730 2731 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2732 /* determine if a QR strategy is needed for change of basis */ 2733 qr_needed = PETSC_FALSE; 2734 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2735 total_primal_vertices=0; 2736 pcbddc->local_primal_size_cc = 0; 2737 for (i=0;i<total_counts_cc;i++) { 2738 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2739 if (size_of_constraint == 1) { 2740 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2741 pcbddc->local_primal_size_cc += 1; 2742 } else if (PetscBTLookup(change_basis,i)) { 2743 for (k=0;k<constraints_n[i];k++) { 2744 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2745 } 2746 pcbddc->local_primal_size_cc += constraints_n[i]; 2747 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2748 PetscBTSet(qr_needed_idx,i); 2749 qr_needed = PETSC_TRUE; 2750 } 2751 } else { 2752 pcbddc->local_primal_size_cc += 1; 2753 } 2754 } 2755 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2756 pcbddc->n_vertices = total_primal_vertices; 2757 /* permute indices in order to have a sorted set of vertices */ 2758 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2759 2760 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2761 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2762 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2763 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2764 2765 /* nonzero structure of constraint matrix */ 2766 /* and get reference dof for local constraints */ 2767 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2768 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2769 2770 j = total_primal_vertices; 2771 total_counts = total_primal_vertices; 2772 cum = total_primal_vertices; 2773 for (i=n_vertices;i<total_counts_cc;i++) { 2774 if (!PetscBTLookup(change_basis,i)) { 2775 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2776 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2777 cum++; 2778 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2779 for (k=0;k<constraints_n[i];k++) { 2780 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2781 nnz[j+k] = size_of_constraint; 2782 } 2783 j += constraints_n[i]; 2784 } 2785 } 2786 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2787 ierr = PetscFree(nnz);CHKERRQ(ierr); 2788 2789 /* set values in constraint matrix */ 2790 for (i=0;i<total_primal_vertices;i++) { 2791 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2792 } 2793 total_counts = total_primal_vertices; 2794 for (i=n_vertices;i<total_counts_cc;i++) { 2795 if (!PetscBTLookup(change_basis,i)) { 2796 PetscInt *cols; 2797 2798 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2799 cols = constraints_idxs+constraints_idxs_ptr[i]; 2800 for (k=0;k<constraints_n[i];k++) { 2801 PetscInt row = total_counts+k; 2802 PetscScalar *vals; 2803 2804 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2805 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2806 } 2807 total_counts += constraints_n[i]; 2808 } 2809 } 2810 /* assembling */ 2811 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2812 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2813 2814 /* 2815 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2816 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2817 */ 2818 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2819 if (pcbddc->use_change_of_basis) { 2820 /* dual and primal dofs on a single cc */ 2821 PetscInt dual_dofs,primal_dofs; 2822 /* working stuff for GEQRF */ 2823 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2824 PetscBLASInt lqr_work; 2825 /* working stuff for UNGQR */ 2826 PetscScalar *gqr_work,lgqr_work_t; 2827 PetscBLASInt lgqr_work; 2828 /* working stuff for TRTRS */ 2829 PetscScalar *trs_rhs; 2830 PetscBLASInt Blas_NRHS; 2831 /* pointers for values insertion into change of basis matrix */ 2832 PetscInt *start_rows,*start_cols; 2833 PetscScalar *start_vals; 2834 /* working stuff for values insertion */ 2835 PetscBT is_primal; 2836 PetscInt *aux_primal_numbering_B; 2837 /* matrix sizes */ 2838 PetscInt global_size,local_size; 2839 /* temporary change of basis */ 2840 Mat localChangeOfBasisMatrix; 2841 /* extra space for debugging */ 2842 PetscScalar *dbg_work; 2843 2844 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2845 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2846 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2847 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2848 /* nonzeros for local mat */ 2849 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2850 for (i=0;i<pcis->n;i++) nnz[i]=1; 2851 for (i=n_vertices;i<total_counts_cc;i++) { 2852 if (PetscBTLookup(change_basis,i)) { 2853 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2854 if (PetscBTLookup(qr_needed_idx,i)) { 2855 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2856 } else { 2857 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2858 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2859 } 2860 } 2861 } 2862 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2863 ierr = PetscFree(nnz);CHKERRQ(ierr); 2864 /* Set initial identity in the matrix */ 2865 for (i=0;i<pcis->n;i++) { 2866 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2867 } 2868 2869 if (pcbddc->dbg_flag) { 2870 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2871 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2872 } 2873 2874 2875 /* Now we loop on the constraints which need a change of basis */ 2876 /* 2877 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2878 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2879 2880 Basic blocks of change of basis matrix T computed by 2881 2882 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2883 2884 | 1 0 ... 0 s_1/S | 2885 | 0 1 ... 0 s_2/S | 2886 | ... | 2887 | 0 ... 1 s_{n-1}/S | 2888 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2889 2890 with S = \sum_{i=1}^n s_i^2 2891 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2892 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2893 2894 - QR decomposition of constraints otherwise 2895 */ 2896 if (qr_needed) { 2897 /* space to store Q */ 2898 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2899 /* first we issue queries for optimal work */ 2900 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2901 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2902 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2903 lqr_work = -1; 2904 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2905 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2906 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2907 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2908 lgqr_work = -1; 2909 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2910 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2911 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2912 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2913 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2914 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2915 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2916 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2917 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2918 /* array to store scaling factors for reflectors */ 2919 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2920 /* array to store rhs and solution of triangular solver */ 2921 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2922 /* allocating workspace for check */ 2923 if (pcbddc->dbg_flag) { 2924 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2925 } 2926 } 2927 /* array to store whether a node is primal or not */ 2928 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2929 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2930 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2931 if (i != total_primal_vertices) { 2932 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2933 } 2934 for (i=0;i<total_primal_vertices;i++) { 2935 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2936 } 2937 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2938 2939 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2940 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2941 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2942 if (PetscBTLookup(change_basis,total_counts)) { 2943 /* get constraint info */ 2944 primal_dofs = constraints_n[total_counts]; 2945 dual_dofs = size_of_constraint-primal_dofs; 2946 2947 if (pcbddc->dbg_flag) { 2948 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2949 } 2950 2951 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2952 2953 /* copy quadrature constraints for change of basis check */ 2954 if (pcbddc->dbg_flag) { 2955 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2956 } 2957 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2958 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2959 2960 /* compute QR decomposition of constraints */ 2961 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2962 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2963 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2964 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2965 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2966 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2967 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2968 2969 /* explictly compute R^-T */ 2970 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2971 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2972 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2973 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2974 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2975 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2976 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2977 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2978 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2979 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2980 2981 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2982 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2983 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2984 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2985 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2986 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2987 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2988 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2989 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2990 2991 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2992 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2993 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2994 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2995 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2996 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2997 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2998 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2999 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3000 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3001 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3002 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3003 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3004 3005 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3006 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3007 /* insert cols for primal dofs */ 3008 for (j=0;j<primal_dofs;j++) { 3009 start_vals = &qr_basis[j*size_of_constraint]; 3010 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3011 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3012 } 3013 /* insert cols for dual dofs */ 3014 for (j=0,k=0;j<dual_dofs;k++) { 3015 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3016 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3017 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3018 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3019 j++; 3020 } 3021 } 3022 3023 /* check change of basis */ 3024 if (pcbddc->dbg_flag) { 3025 PetscInt ii,jj; 3026 PetscBool valid_qr=PETSC_TRUE; 3027 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3028 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3029 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3030 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3031 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3032 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3033 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3034 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3035 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3036 for (jj=0;jj<size_of_constraint;jj++) { 3037 for (ii=0;ii<primal_dofs;ii++) { 3038 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3039 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3040 } 3041 } 3042 if (!valid_qr) { 3043 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3044 for (jj=0;jj<size_of_constraint;jj++) { 3045 for (ii=0;ii<primal_dofs;ii++) { 3046 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3047 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3048 } 3049 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3050 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3051 } 3052 } 3053 } 3054 } else { 3055 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3056 } 3057 } 3058 } else { /* simple transformation block */ 3059 PetscInt row,col; 3060 PetscScalar val,norm; 3061 3062 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3063 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3064 for (j=0;j<size_of_constraint;j++) { 3065 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3066 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3067 if (!PetscBTLookup(is_primal,row_B)) { 3068 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3069 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3070 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3071 } else { 3072 for (k=0;k<size_of_constraint;k++) { 3073 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3074 if (row != col) { 3075 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3076 } else { 3077 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3078 } 3079 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3080 } 3081 } 3082 } 3083 if (pcbddc->dbg_flag) { 3084 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3085 } 3086 } 3087 } else { 3088 if (pcbddc->dbg_flag) { 3089 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3090 } 3091 } 3092 } 3093 3094 /* free workspace */ 3095 if (qr_needed) { 3096 if (pcbddc->dbg_flag) { 3097 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3098 } 3099 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3100 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3101 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3102 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3103 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3104 } 3105 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3106 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3107 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3108 3109 /* assembling of global change of variable */ 3110 { 3111 Mat tmat; 3112 PetscInt bs; 3113 3114 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3115 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3116 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3117 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3118 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3119 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3120 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3121 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3122 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3123 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3124 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3125 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3126 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3127 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3128 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3129 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3130 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3131 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3132 } 3133 /* check */ 3134 if (pcbddc->dbg_flag) { 3135 PetscReal error; 3136 Vec x,x_change; 3137 3138 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3139 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3140 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3141 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3142 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3143 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3144 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3145 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3146 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3147 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3148 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3149 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3150 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3151 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3152 ierr = VecDestroy(&x);CHKERRQ(ierr); 3153 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3154 } 3155 3156 /* adapt sub_schurs computed (if any) */ 3157 if (pcbddc->use_deluxe_scaling) { 3158 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3159 if (sub_schurs->S_Ej_all) { 3160 Mat S_new,tmat; 3161 IS is_all_N; 3162 3163 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3164 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3165 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3166 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3167 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3168 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3169 sub_schurs->S_Ej_all = S_new; 3170 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3171 if (sub_schurs->sum_S_Ej_all) { 3172 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3173 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3174 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3175 sub_schurs->sum_S_Ej_all = S_new; 3176 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3177 } 3178 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3179 } 3180 } 3181 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3182 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3183 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3184 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3185 } 3186 3187 /* set up change of basis context */ 3188 if (pcbddc->ChangeOfBasisMatrix) { 3189 PCBDDCChange_ctx change_ctx; 3190 3191 if (!pcbddc->new_global_mat) { 3192 PetscInt global_size,local_size; 3193 3194 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3195 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3196 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3197 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3198 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3199 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3200 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3201 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3202 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3203 } else { 3204 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3205 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3206 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3207 } 3208 if (!pcbddc->user_ChangeOfBasisMatrix) { 3209 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3210 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3211 } else { 3212 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3213 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3214 } 3215 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3216 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3217 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3218 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3219 } 3220 3221 /* add pressure dof to set of primal nodes for numbering purposes */ 3222 if (pcbddc->benign_p0_lidx >= 0) { 3223 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3224 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3225 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3226 pcbddc->local_primal_size_cc++; 3227 pcbddc->local_primal_size++; 3228 } 3229 3230 /* check if a new primal space has been introduced (also take into account benign trick) */ 3231 pcbddc->new_primal_space_local = PETSC_TRUE; 3232 if (olocal_primal_size == pcbddc->local_primal_size) { 3233 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3234 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3235 if (!pcbddc->new_primal_space_local) { 3236 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3237 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3238 } 3239 } 3240 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3241 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3242 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3243 3244 /* flush dbg viewer */ 3245 if (pcbddc->dbg_flag) { 3246 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3247 } 3248 3249 /* free workspace */ 3250 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3251 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3252 if (!pcbddc->adaptive_selection) { 3253 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3254 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3255 } else { 3256 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3257 pcbddc->adaptive_constraints_idxs_ptr, 3258 pcbddc->adaptive_constraints_data_ptr, 3259 pcbddc->adaptive_constraints_idxs, 3260 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3261 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3262 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3263 } 3264 PetscFunctionReturn(0); 3265 } 3266 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3269 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3270 { 3271 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3272 PC_IS *pcis = (PC_IS*)pc->data; 3273 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3274 PetscInt ierr,i,vertex_size,N; 3275 PetscViewer viewer=pcbddc->dbg_viewer; 3276 3277 PetscFunctionBegin; 3278 /* Reset previously computed graph */ 3279 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3280 /* Init local Graph struct */ 3281 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3282 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3283 3284 /* Check validity of the csr graph passed in by the user */ 3285 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3286 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3287 } 3288 3289 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3290 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3291 PetscInt *xadj,*adjncy; 3292 PetscInt nvtxs; 3293 PetscBool flg_row=PETSC_FALSE; 3294 3295 if (pcbddc->use_local_adj) { 3296 3297 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3298 if (flg_row) { 3299 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3300 pcbddc->computed_rowadj = PETSC_TRUE; 3301 } 3302 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3303 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3304 IS is_dummy; 3305 ISLocalToGlobalMapping l2gmap_dummy; 3306 PetscInt j,sum; 3307 PetscInt *cxadj,*cadjncy; 3308 const PetscInt *idxs; 3309 PCBDDCGraph graph; 3310 PetscBT is_on_boundary; 3311 3312 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3313 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3314 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3315 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3316 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3317 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3318 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3319 if (flg_row) { 3320 graph->xadj = xadj; 3321 graph->adjncy = adjncy; 3322 } 3323 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3324 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3325 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3326 3327 if (pcbddc->dbg_flag) { 3328 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3329 for (i=0;i<graph->ncc;i++) { 3330 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3331 } 3332 } 3333 3334 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3335 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3336 for (i=0;i<pcis->n_B;i++) { 3337 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3338 } 3339 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3340 3341 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3342 sum = 0; 3343 for (i=0;i<graph->ncc;i++) { 3344 PetscInt sizecc = 0; 3345 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3346 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3347 sizecc++; 3348 } 3349 } 3350 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3351 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3352 cxadj[graph->queue[j]] = sizecc; 3353 } 3354 } 3355 sum += sizecc*sizecc; 3356 } 3357 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3358 sum = 0; 3359 for (i=0;i<pcis->n;i++) { 3360 PetscInt temp = cxadj[i]; 3361 cxadj[i] = sum; 3362 sum += temp; 3363 } 3364 cxadj[pcis->n] = sum; 3365 for (i=0;i<graph->ncc;i++) { 3366 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3367 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3368 PetscInt k,sizecc = 0; 3369 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3370 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3371 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3372 sizecc++; 3373 } 3374 } 3375 } 3376 } 3377 } 3378 if (sum) { 3379 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3380 } else { 3381 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3382 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3383 } 3384 graph->xadj = 0; 3385 graph->adjncy = 0; 3386 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3387 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3388 } 3389 } 3390 if (pcbddc->dbg_flag) { 3391 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3392 } 3393 3394 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3395 vertex_size = 1; 3396 if (pcbddc->user_provided_isfordofs) { 3397 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3398 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3399 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3400 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3401 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3402 } 3403 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3404 pcbddc->n_ISForDofs = 0; 3405 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3406 } 3407 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3408 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3409 } else { 3410 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3411 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3412 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3413 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3414 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3415 } 3416 } 3417 } 3418 3419 /* Setup of Graph */ 3420 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3421 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3422 } 3423 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3424 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3425 } 3426 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3427 3428 /* Graph's connected components analysis */ 3429 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3430 3431 /* print some info to stdout */ 3432 if (pcbddc->dbg_flag) { 3433 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3434 } 3435 3436 /* mark topography has done */ 3437 pcbddc->recompute_topography = PETSC_FALSE; 3438 PetscFunctionReturn(0); 3439 } 3440 3441 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3442 #undef __FUNCT__ 3443 #define __FUNCT__ "PCBDDCSubsetNumbering" 3444 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3445 { 3446 PetscSF sf; 3447 PetscLayout map; 3448 const PetscInt *idxs; 3449 PetscInt *leaf_data,*root_data,*gidxs; 3450 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3451 PetscInt n_n,nlocals,start,first_index; 3452 PetscMPIInt commsize; 3453 PetscBool first_found; 3454 PetscErrorCode ierr; 3455 3456 PetscFunctionBegin; 3457 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3458 if (subset_mult) { 3459 PetscCheckSameComm(subset,1,subset_mult,2); 3460 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3461 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3462 } 3463 /* create workspace layout for computing global indices of subset */ 3464 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3465 lbounds[0] = lbounds[1] = 0; 3466 for (i=0;i<n;i++) { 3467 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3468 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3469 } 3470 lbounds[0] = -lbounds[0]; 3471 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3472 gbounds[0] = -gbounds[0]; 3473 N = gbounds[1] - gbounds[0] + 1; 3474 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3475 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3476 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3477 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3478 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3479 3480 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3481 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3482 if (subset_mult) { 3483 const PetscInt* idxs_mult; 3484 3485 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3486 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3487 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3488 } else { 3489 for (i=0;i<n;i++) leaf_data[i] = 1; 3490 } 3491 /* local size of new subset */ 3492 n_n = 0; 3493 for (i=0;i<n;i++) n_n += leaf_data[i]; 3494 3495 /* global indexes in layout */ 3496 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3497 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3498 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3499 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3500 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3501 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3502 3503 /* reduce from leaves to roots */ 3504 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3505 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3506 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3507 3508 /* count indexes in local part of layout */ 3509 nlocals = 0; 3510 first_index = -1; 3511 first_found = PETSC_FALSE; 3512 for (i=0;i<Nl;i++) { 3513 if (!first_found && root_data[i]) { 3514 first_found = PETSC_TRUE; 3515 first_index = i; 3516 } 3517 nlocals += root_data[i]; 3518 } 3519 3520 /* cumulative of number of indexes and size of subset without holes */ 3521 #if defined(PETSC_HAVE_MPI_EXSCAN) 3522 start = 0; 3523 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3524 #else 3525 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3526 start = start-nlocals; 3527 #endif 3528 3529 if (N_n) { /* compute total size of new subset if requested */ 3530 *N_n = start + nlocals; 3531 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3532 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3533 } 3534 3535 /* adapt root data with cumulative */ 3536 if (first_found) { 3537 PetscInt old_index; 3538 3539 root_data[first_index] += start; 3540 old_index = first_index; 3541 for (i=first_index+1;i<Nl;i++) { 3542 if (root_data[i]) { 3543 root_data[i] += root_data[old_index]; 3544 old_index = i; 3545 } 3546 } 3547 } 3548 3549 /* from roots to leaves */ 3550 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3551 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3552 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3553 3554 /* create new IS with global indexes without holes */ 3555 if (subset_mult) { 3556 const PetscInt* idxs_mult; 3557 PetscInt cum; 3558 3559 cum = 0; 3560 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3561 for (i=0;i<n;i++) { 3562 PetscInt j; 3563 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3564 } 3565 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3566 } else { 3567 for (i=0;i<n;i++) { 3568 gidxs[i] = leaf_data[i]-1; 3569 } 3570 } 3571 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3572 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3573 PetscFunctionReturn(0); 3574 } 3575 3576 #undef __FUNCT__ 3577 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3578 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3579 { 3580 PetscInt i,j; 3581 PetscScalar *alphas; 3582 PetscErrorCode ierr; 3583 3584 PetscFunctionBegin; 3585 /* this implements stabilized Gram-Schmidt */ 3586 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3587 for (i=0;i<n;i++) { 3588 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3589 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3590 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3591 } 3592 ierr = PetscFree(alphas);CHKERRQ(ierr); 3593 PetscFunctionReturn(0); 3594 } 3595 3596 #undef __FUNCT__ 3597 #define __FUNCT__ "MatISGetSubassemblingPattern" 3598 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3599 { 3600 IS ranks_send_to; 3601 PetscInt n_neighs,*neighs,*n_shared,**shared; 3602 PetscMPIInt size,rank,color; 3603 PetscInt *xadj,*adjncy; 3604 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3605 PetscInt i,local_size,threshold=0; 3606 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3607 PetscSubcomm subcomm; 3608 PetscErrorCode ierr; 3609 3610 PetscFunctionBegin; 3611 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3612 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3613 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3614 3615 /* Get info on mapping */ 3616 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3617 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3618 3619 /* build local CSR graph of subdomains' connectivity */ 3620 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3621 xadj[0] = 0; 3622 xadj[1] = PetscMax(n_neighs-1,0); 3623 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3624 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3625 3626 if (threshold) { 3627 PetscInt xadj_count = 0; 3628 for (i=1;i<n_neighs;i++) { 3629 if (n_shared[i] > threshold) { 3630 adjncy[xadj_count] = neighs[i]; 3631 adjncy_wgt[xadj_count] = n_shared[i]; 3632 xadj_count++; 3633 } 3634 } 3635 xadj[1] = xadj_count; 3636 } else { 3637 if (xadj[1]) { 3638 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3639 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3640 } 3641 } 3642 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3643 if (use_square) { 3644 for (i=0;i<xadj[1];i++) { 3645 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3646 } 3647 } 3648 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3649 3650 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3651 3652 /* 3653 Restrict work on active processes only. 3654 */ 3655 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3656 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3657 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3658 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3659 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3660 if (color) { 3661 ierr = PetscFree(xadj);CHKERRQ(ierr); 3662 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3663 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3664 } else { 3665 Mat subdomain_adj; 3666 IS new_ranks,new_ranks_contig; 3667 MatPartitioning partitioner; 3668 PetscInt prank,rstart=0,rend=0; 3669 PetscInt *is_indices,*oldranks; 3670 PetscBool aggregate; 3671 3672 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3673 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3674 prank = rank; 3675 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3676 /* 3677 for (i=0;i<size;i++) { 3678 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3679 } 3680 */ 3681 for (i=0;i<xadj[1];i++) { 3682 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3683 } 3684 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3685 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3686 if (aggregate) { 3687 PetscInt lrows,row,ncols,*cols; 3688 PetscMPIInt nrank; 3689 PetscScalar *vals; 3690 3691 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3692 lrows = 0; 3693 if (nrank<redprocs) { 3694 lrows = size/redprocs; 3695 if (nrank<size%redprocs) lrows++; 3696 } 3697 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3698 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3699 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3700 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3701 row = nrank; 3702 ncols = xadj[1]-xadj[0]; 3703 cols = adjncy; 3704 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3705 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3706 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3707 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3708 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3709 ierr = PetscFree(xadj);CHKERRQ(ierr); 3710 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3711 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3712 ierr = PetscFree(vals);CHKERRQ(ierr); 3713 } else { 3714 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3715 } 3716 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3717 3718 /* Partition */ 3719 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3720 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3721 if (use_vwgt) { 3722 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3723 v_wgt[0] = local_size; 3724 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3725 } 3726 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3727 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3728 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3729 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3730 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3731 3732 /* renumber new_ranks to avoid "holes" in new set of processors */ 3733 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3734 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3735 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3736 if (!redprocs) { 3737 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3738 } else { 3739 PetscInt idxs[1]; 3740 PetscMPIInt tag; 3741 MPI_Request *reqs; 3742 3743 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3744 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3745 for (i=rstart;i<rend;i++) { 3746 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3747 } 3748 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3749 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3750 ierr = PetscFree(reqs);CHKERRQ(ierr); 3751 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3752 } 3753 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3754 /* clean up */ 3755 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3756 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3757 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3758 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3759 } 3760 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3761 3762 /* assemble parallel IS for sends */ 3763 i = 1; 3764 if (color) i=0; 3765 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3766 /* get back IS */ 3767 *is_sends = ranks_send_to; 3768 PetscFunctionReturn(0); 3769 } 3770 3771 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3772 3773 #undef __FUNCT__ 3774 #define __FUNCT__ "MatISSubassemble" 3775 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3776 { 3777 Mat local_mat; 3778 IS is_sends_internal; 3779 PetscInt rows,cols,new_local_rows; 3780 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3781 PetscBool ismatis,isdense,newisdense,destroy_mat; 3782 ISLocalToGlobalMapping l2gmap; 3783 PetscInt* l2gmap_indices; 3784 const PetscInt* is_indices; 3785 MatType new_local_type; 3786 /* buffers */ 3787 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3788 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3789 PetscInt *recv_buffer_idxs_local; 3790 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3791 /* MPI */ 3792 MPI_Comm comm,comm_n; 3793 PetscSubcomm subcomm; 3794 PetscMPIInt n_sends,n_recvs,commsize; 3795 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3796 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3797 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3798 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3799 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3800 PetscErrorCode ierr; 3801 3802 PetscFunctionBegin; 3803 /* TODO: add missing checks */ 3804 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3805 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3806 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3807 PetscValidLogicalCollectiveInt(mat,nis,7); 3808 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3809 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3810 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3811 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3812 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3813 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3814 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3815 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3816 PetscInt mrows,mcols,mnrows,mncols; 3817 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3818 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3819 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3820 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3821 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3822 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3823 } 3824 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3825 PetscValidLogicalCollectiveInt(mat,bs,0); 3826 /* prepare IS for sending if not provided */ 3827 if (!is_sends) { 3828 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3829 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3830 } else { 3831 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3832 is_sends_internal = is_sends; 3833 } 3834 3835 /* get comm */ 3836 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3837 3838 /* compute number of sends */ 3839 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3840 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3841 3842 /* compute number of receives */ 3843 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3844 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3845 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3846 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3847 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3848 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3849 ierr = PetscFree(iflags);CHKERRQ(ierr); 3850 3851 /* restrict comm if requested */ 3852 subcomm = 0; 3853 destroy_mat = PETSC_FALSE; 3854 if (restrict_comm) { 3855 PetscMPIInt color,subcommsize; 3856 3857 color = 0; 3858 if (restrict_full) { 3859 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3860 } else { 3861 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3862 } 3863 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3864 subcommsize = commsize - subcommsize; 3865 /* check if reuse has been requested */ 3866 if (reuse == MAT_REUSE_MATRIX) { 3867 if (*mat_n) { 3868 PetscMPIInt subcommsize2; 3869 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3870 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3871 comm_n = PetscObjectComm((PetscObject)*mat_n); 3872 } else { 3873 comm_n = PETSC_COMM_SELF; 3874 } 3875 } else { /* MAT_INITIAL_MATRIX */ 3876 PetscMPIInt rank; 3877 3878 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3879 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3880 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3881 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3882 comm_n = PetscSubcommChild(subcomm); 3883 } 3884 /* flag to destroy *mat_n if not significative */ 3885 if (color) destroy_mat = PETSC_TRUE; 3886 } else { 3887 comm_n = comm; 3888 } 3889 3890 /* prepare send/receive buffers */ 3891 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3892 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3893 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3894 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3895 if (nis) { 3896 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3897 } 3898 3899 /* Get data from local matrices */ 3900 if (!isdense) { 3901 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3902 /* TODO: See below some guidelines on how to prepare the local buffers */ 3903 /* 3904 send_buffer_vals should contain the raw values of the local matrix 3905 send_buffer_idxs should contain: 3906 - MatType_PRIVATE type 3907 - PetscInt size_of_l2gmap 3908 - PetscInt global_row_indices[size_of_l2gmap] 3909 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3910 */ 3911 } else { 3912 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3913 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3914 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3915 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3916 send_buffer_idxs[1] = i; 3917 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3918 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3919 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3920 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3921 for (i=0;i<n_sends;i++) { 3922 ilengths_vals[is_indices[i]] = len*len; 3923 ilengths_idxs[is_indices[i]] = len+2; 3924 } 3925 } 3926 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3927 /* additional is (if any) */ 3928 if (nis) { 3929 PetscMPIInt psum; 3930 PetscInt j; 3931 for (j=0,psum=0;j<nis;j++) { 3932 PetscInt plen; 3933 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3934 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3935 psum += len+1; /* indices + lenght */ 3936 } 3937 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3938 for (j=0,psum=0;j<nis;j++) { 3939 PetscInt plen; 3940 const PetscInt *is_array_idxs; 3941 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3942 send_buffer_idxs_is[psum] = plen; 3943 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3944 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3945 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3946 psum += plen+1; /* indices + lenght */ 3947 } 3948 for (i=0;i<n_sends;i++) { 3949 ilengths_idxs_is[is_indices[i]] = psum; 3950 } 3951 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3952 } 3953 3954 buf_size_idxs = 0; 3955 buf_size_vals = 0; 3956 buf_size_idxs_is = 0; 3957 for (i=0;i<n_recvs;i++) { 3958 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3959 buf_size_vals += (PetscInt)olengths_vals[i]; 3960 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3961 } 3962 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3963 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3964 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3965 3966 /* get new tags for clean communications */ 3967 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3968 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3969 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3970 3971 /* allocate for requests */ 3972 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3973 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3974 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3975 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3976 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3977 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3978 3979 /* communications */ 3980 ptr_idxs = recv_buffer_idxs; 3981 ptr_vals = recv_buffer_vals; 3982 ptr_idxs_is = recv_buffer_idxs_is; 3983 for (i=0;i<n_recvs;i++) { 3984 source_dest = onodes[i]; 3985 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3986 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3987 ptr_idxs += olengths_idxs[i]; 3988 ptr_vals += olengths_vals[i]; 3989 if (nis) { 3990 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3991 ptr_idxs_is += olengths_idxs_is[i]; 3992 } 3993 } 3994 for (i=0;i<n_sends;i++) { 3995 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3996 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3997 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3998 if (nis) { 3999 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4000 } 4001 } 4002 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4003 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4004 4005 /* assemble new l2g map */ 4006 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4007 ptr_idxs = recv_buffer_idxs; 4008 new_local_rows = 0; 4009 for (i=0;i<n_recvs;i++) { 4010 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4011 ptr_idxs += olengths_idxs[i]; 4012 } 4013 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4014 ptr_idxs = recv_buffer_idxs; 4015 new_local_rows = 0; 4016 for (i=0;i<n_recvs;i++) { 4017 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4018 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4019 ptr_idxs += olengths_idxs[i]; 4020 } 4021 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4022 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4023 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4024 4025 /* infer new local matrix type from received local matrices type */ 4026 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4027 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4028 if (n_recvs) { 4029 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4030 ptr_idxs = recv_buffer_idxs; 4031 for (i=0;i<n_recvs;i++) { 4032 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4033 new_local_type_private = MATAIJ_PRIVATE; 4034 break; 4035 } 4036 ptr_idxs += olengths_idxs[i]; 4037 } 4038 switch (new_local_type_private) { 4039 case MATDENSE_PRIVATE: 4040 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4041 new_local_type = MATSEQAIJ; 4042 bs = 1; 4043 } else { /* if I receive only 1 dense matrix */ 4044 new_local_type = MATSEQDENSE; 4045 bs = 1; 4046 } 4047 break; 4048 case MATAIJ_PRIVATE: 4049 new_local_type = MATSEQAIJ; 4050 bs = 1; 4051 break; 4052 case MATBAIJ_PRIVATE: 4053 new_local_type = MATSEQBAIJ; 4054 break; 4055 case MATSBAIJ_PRIVATE: 4056 new_local_type = MATSEQSBAIJ; 4057 break; 4058 default: 4059 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4060 break; 4061 } 4062 } else { /* by default, new_local_type is seqdense */ 4063 new_local_type = MATSEQDENSE; 4064 bs = 1; 4065 } 4066 4067 /* create MATIS object if needed */ 4068 if (reuse == MAT_INITIAL_MATRIX) { 4069 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4070 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4071 } else { 4072 /* it also destroys the local matrices */ 4073 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4074 } 4075 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4076 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4077 4078 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4079 4080 /* Global to local map of received indices */ 4081 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4082 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4083 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4084 4085 /* restore attributes -> type of incoming data and its size */ 4086 buf_size_idxs = 0; 4087 for (i=0;i<n_recvs;i++) { 4088 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4089 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4090 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4091 } 4092 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4093 4094 /* set preallocation */ 4095 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4096 if (!newisdense) { 4097 PetscInt *new_local_nnz=0; 4098 4099 ptr_vals = recv_buffer_vals; 4100 ptr_idxs = recv_buffer_idxs_local; 4101 if (n_recvs) { 4102 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4103 } 4104 for (i=0;i<n_recvs;i++) { 4105 PetscInt j; 4106 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4107 for (j=0;j<*(ptr_idxs+1);j++) { 4108 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4109 } 4110 } else { 4111 /* TODO */ 4112 } 4113 ptr_idxs += olengths_idxs[i]; 4114 } 4115 if (new_local_nnz) { 4116 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4117 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4118 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4119 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4120 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4121 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4122 } else { 4123 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4124 } 4125 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4126 } else { 4127 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4128 } 4129 4130 /* set values */ 4131 ptr_vals = recv_buffer_vals; 4132 ptr_idxs = recv_buffer_idxs_local; 4133 for (i=0;i<n_recvs;i++) { 4134 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4135 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4136 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4137 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4138 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4139 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4140 } else { 4141 /* TODO */ 4142 } 4143 ptr_idxs += olengths_idxs[i]; 4144 ptr_vals += olengths_vals[i]; 4145 } 4146 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4147 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4148 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4150 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4151 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4152 4153 #if 0 4154 if (!restrict_comm) { /* check */ 4155 Vec lvec,rvec; 4156 PetscReal infty_error; 4157 4158 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4159 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4160 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4161 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4162 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4163 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4164 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4165 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4166 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4167 } 4168 #endif 4169 4170 /* assemble new additional is (if any) */ 4171 if (nis) { 4172 PetscInt **temp_idxs,*count_is,j,psum; 4173 4174 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4175 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4176 ptr_idxs = recv_buffer_idxs_is; 4177 psum = 0; 4178 for (i=0;i<n_recvs;i++) { 4179 for (j=0;j<nis;j++) { 4180 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4181 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4182 psum += plen; 4183 ptr_idxs += plen+1; /* shift pointer to received data */ 4184 } 4185 } 4186 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4187 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4188 for (i=1;i<nis;i++) { 4189 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4190 } 4191 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4192 ptr_idxs = recv_buffer_idxs_is; 4193 for (i=0;i<n_recvs;i++) { 4194 for (j=0;j<nis;j++) { 4195 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4196 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4197 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4198 ptr_idxs += plen+1; /* shift pointer to received data */ 4199 } 4200 } 4201 for (i=0;i<nis;i++) { 4202 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4203 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4204 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4205 } 4206 ierr = PetscFree(count_is);CHKERRQ(ierr); 4207 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4208 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4209 } 4210 /* free workspace */ 4211 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4212 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4213 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4214 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4215 if (isdense) { 4216 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4217 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4218 } else { 4219 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4220 } 4221 if (nis) { 4222 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4223 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4224 } 4225 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4226 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4227 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4228 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4229 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4230 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4231 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4232 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4233 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4234 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4235 ierr = PetscFree(onodes);CHKERRQ(ierr); 4236 if (nis) { 4237 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4238 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4239 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4240 } 4241 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4242 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4243 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4244 for (i=0;i<nis;i++) { 4245 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4246 } 4247 *mat_n = NULL; 4248 } 4249 PetscFunctionReturn(0); 4250 } 4251 4252 /* temporary hack into ksp private data structure */ 4253 #include <petsc/private/kspimpl.h> 4254 4255 #undef __FUNCT__ 4256 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4257 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4258 { 4259 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4260 PC_IS *pcis = (PC_IS*)pc->data; 4261 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4262 MatNullSpace CoarseNullSpace=NULL; 4263 ISLocalToGlobalMapping coarse_islg; 4264 IS coarse_is,*isarray; 4265 PetscInt i,im_active=-1,active_procs=-1; 4266 PetscInt nis,nisdofs,nisneu; 4267 PC pc_temp; 4268 PCType coarse_pc_type; 4269 KSPType coarse_ksp_type; 4270 PetscBool multilevel_requested,multilevel_allowed; 4271 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4272 Mat t_coarse_mat_is; 4273 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4274 PetscMPIInt all_procs; 4275 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4276 PetscBool compute_vecs = PETSC_FALSE; 4277 PetscScalar *array; 4278 PetscErrorCode ierr; 4279 4280 PetscFunctionBegin; 4281 /* Assign global numbering to coarse dofs */ 4282 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4283 PetscInt ocoarse_size; 4284 compute_vecs = PETSC_TRUE; 4285 ocoarse_size = pcbddc->coarse_size; 4286 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4287 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4288 /* see if we can avoid some work */ 4289 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4290 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4291 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4292 PC pc; 4293 PetscBool isbddc; 4294 4295 /* temporary workaround since PCBDDC does not have a reset method so far */ 4296 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4297 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4298 if (isbddc) { 4299 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4300 } 4301 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4302 coarse_reuse = PETSC_FALSE; 4303 } else { /* we can safely reuse already computed coarse matrix */ 4304 coarse_reuse = PETSC_TRUE; 4305 } 4306 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4307 coarse_reuse = PETSC_FALSE; 4308 } 4309 /* reset any subassembling information */ 4310 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4311 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4312 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4313 coarse_reuse = PETSC_TRUE; 4314 } 4315 4316 /* count "active" (i.e. with positive local size) and "void" processes */ 4317 im_active = !!(pcis->n); 4318 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4319 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4320 void_procs = all_procs-active_procs; 4321 csin_type_simple = PETSC_TRUE; 4322 redist = PETSC_FALSE; 4323 if (pcbddc->current_level && void_procs) { 4324 csin_ml = PETSC_TRUE; 4325 ncoarse_ml = void_procs; 4326 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4327 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4328 csin_ds = PETSC_TRUE; 4329 ncoarse_ds = pcbddc->redistribute_coarse; 4330 redist = PETSC_TRUE; 4331 } else { 4332 csin_ds = PETSC_TRUE; 4333 ncoarse_ds = active_procs; 4334 redist = PETSC_TRUE; 4335 } 4336 } else { 4337 csin_ml = PETSC_FALSE; 4338 ncoarse_ml = all_procs; 4339 if (void_procs) { 4340 csin_ds = PETSC_TRUE; 4341 ncoarse_ds = void_procs; 4342 csin_type_simple = PETSC_FALSE; 4343 } else { 4344 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4345 csin_ds = PETSC_TRUE; 4346 ncoarse_ds = pcbddc->redistribute_coarse; 4347 redist = PETSC_TRUE; 4348 } else { 4349 csin_ds = PETSC_FALSE; 4350 ncoarse_ds = all_procs; 4351 } 4352 } 4353 } 4354 4355 /* 4356 test if we can go multilevel: three conditions must be satisfied: 4357 - we have not exceeded the number of levels requested 4358 - we can actually subassemble the active processes 4359 - we can find a suitable number of MPI processes where we can place the subassembled problem 4360 */ 4361 multilevel_allowed = PETSC_FALSE; 4362 multilevel_requested = PETSC_FALSE; 4363 if (pcbddc->current_level < pcbddc->max_levels) { 4364 multilevel_requested = PETSC_TRUE; 4365 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4366 multilevel_allowed = PETSC_FALSE; 4367 } else { 4368 multilevel_allowed = PETSC_TRUE; 4369 } 4370 } 4371 /* determine number of process partecipating to coarse solver */ 4372 if (multilevel_allowed) { 4373 ncoarse = ncoarse_ml; 4374 csin = csin_ml; 4375 redist = PETSC_FALSE; 4376 } else { 4377 ncoarse = ncoarse_ds; 4378 csin = csin_ds; 4379 } 4380 4381 /* creates temporary l2gmap and IS for coarse indexes */ 4382 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4383 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4384 4385 /* creates temporary MATIS object for coarse matrix */ 4386 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4387 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4388 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4389 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4390 #if 0 4391 { 4392 PetscViewer viewer; 4393 char filename[256]; 4394 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4395 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4396 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4397 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4398 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4399 } 4400 #endif 4401 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4402 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4403 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4404 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4405 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4406 4407 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4408 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4409 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4410 const PetscInt *idxs; 4411 ISLocalToGlobalMapping tmap; 4412 4413 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4414 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4415 /* allocate space for temporary storage */ 4416 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4417 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4418 /* allocate for IS array */ 4419 nisdofs = pcbddc->n_ISForDofsLocal; 4420 nisneu = !!pcbddc->NeumannBoundariesLocal; 4421 nis = nisdofs + nisneu; 4422 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4423 /* dofs splitting */ 4424 for (i=0;i<nisdofs;i++) { 4425 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4426 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4427 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4428 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4429 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4430 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4431 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4432 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4433 } 4434 /* neumann boundaries */ 4435 if (pcbddc->NeumannBoundariesLocal) { 4436 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4437 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4438 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4439 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4440 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4441 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4442 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4443 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4444 } 4445 /* free memory */ 4446 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4447 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4448 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4449 } else { 4450 nis = 0; 4451 nisdofs = 0; 4452 nisneu = 0; 4453 isarray = NULL; 4454 } 4455 /* destroy no longer needed map */ 4456 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4457 4458 /* restrict on coarse candidates (if needed) */ 4459 coarse_mat_is = NULL; 4460 if (csin) { 4461 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4462 if (redist) { 4463 PetscMPIInt rank; 4464 PetscInt spc,n_spc_p1,dest[1],destsize; 4465 4466 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4467 spc = active_procs/ncoarse; 4468 n_spc_p1 = active_procs%ncoarse; 4469 if (im_active) { 4470 destsize = 1; 4471 if (rank > n_spc_p1*(spc+1)-1) { 4472 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4473 } else { 4474 dest[0] = rank/(spc+1); 4475 } 4476 } else { 4477 destsize = 0; 4478 } 4479 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4480 } else if (csin_type_simple) { 4481 PetscMPIInt rank; 4482 PetscInt issize,isidx; 4483 4484 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4485 if (im_active) { 4486 issize = 1; 4487 isidx = (PetscInt)rank; 4488 } else { 4489 issize = 0; 4490 isidx = -1; 4491 } 4492 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4493 } else { /* get a suitable subassembling pattern from MATIS code */ 4494 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4495 } 4496 4497 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4498 if (!redist || ncoarse <= void_procs) { 4499 PetscInt ncoarse_cand,tissize,*nisindices; 4500 PetscInt *coarse_candidates; 4501 const PetscInt* tisindices; 4502 4503 /* get coarse candidates' ranks in pc communicator */ 4504 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4505 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4506 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4507 if (!coarse_candidates[i]) { 4508 coarse_candidates[ncoarse_cand++]=i; 4509 } 4510 } 4511 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4512 4513 4514 if (pcbddc->dbg_flag) { 4515 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4516 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4517 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4518 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4519 for (i=0;i<ncoarse_cand;i++) { 4520 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4521 } 4522 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4523 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4524 } 4525 /* shift the pattern on coarse candidates */ 4526 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4527 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4528 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4529 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4530 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4531 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4532 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4533 } 4534 if (pcbddc->dbg_flag) { 4535 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4536 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4537 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4538 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4539 } 4540 } 4541 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4542 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4543 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4544 } else { /* this is the last level, so use just receiving processes in subcomm */ 4545 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4546 } 4547 } else { 4548 if (pcbddc->dbg_flag) { 4549 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4550 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4551 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4552 } 4553 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4554 coarse_mat_is = t_coarse_mat_is; 4555 } 4556 4557 /* create local to global scatters for coarse problem */ 4558 if (compute_vecs) { 4559 PetscInt lrows; 4560 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4561 if (coarse_mat_is) { 4562 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4563 } else { 4564 lrows = 0; 4565 } 4566 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4567 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4568 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4569 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4570 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4571 } 4572 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4573 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4574 4575 /* set defaults for coarse KSP and PC */ 4576 if (multilevel_allowed) { 4577 coarse_ksp_type = KSPRICHARDSON; 4578 coarse_pc_type = PCBDDC; 4579 } else { 4580 coarse_ksp_type = KSPPREONLY; 4581 coarse_pc_type = PCREDUNDANT; 4582 } 4583 4584 /* print some info if requested */ 4585 if (pcbddc->dbg_flag) { 4586 if (!multilevel_allowed) { 4587 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4588 if (multilevel_requested) { 4589 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4590 } else if (pcbddc->max_levels) { 4591 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4592 } 4593 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4594 } 4595 } 4596 4597 /* create the coarse KSP object only once with defaults */ 4598 if (coarse_mat_is) { 4599 MatReuse coarse_mat_reuse; 4600 PetscViewer dbg_viewer = NULL; 4601 if (pcbddc->dbg_flag) { 4602 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4603 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4604 } 4605 if (!pcbddc->coarse_ksp) { 4606 char prefix[256],str_level[16]; 4607 size_t len; 4608 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4609 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4610 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4611 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4612 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4613 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4614 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4615 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4616 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4617 /* prefix */ 4618 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4619 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4620 if (!pcbddc->current_level) { 4621 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4622 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4623 } else { 4624 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4625 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4626 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4627 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4628 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4629 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4630 } 4631 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4632 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4633 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4634 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4635 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4636 /* allow user customization */ 4637 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4638 } 4639 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4640 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4641 if (nisdofs) { 4642 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4643 for (i=0;i<nisdofs;i++) { 4644 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4645 } 4646 } 4647 if (nisneu) { 4648 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4649 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4650 } 4651 4652 /* get some info after set from options */ 4653 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4654 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4655 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4656 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4657 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4658 isbddc = PETSC_FALSE; 4659 } 4660 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4661 if (isredundant) { 4662 KSP inner_ksp; 4663 PC inner_pc; 4664 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4665 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4666 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4667 } 4668 4669 /* assemble coarse matrix */ 4670 if (coarse_reuse) { 4671 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4672 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4673 coarse_mat_reuse = MAT_REUSE_MATRIX; 4674 } else { 4675 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4676 } 4677 if (isbddc || isnn) { 4678 if (pcbddc->coarsening_ratio > 1) { 4679 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4680 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4681 if (pcbddc->dbg_flag) { 4682 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4683 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4684 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4685 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4686 } 4687 } 4688 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4689 } else { 4690 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4691 coarse_mat = coarse_mat_is; 4692 } 4693 } else { 4694 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4695 } 4696 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4697 4698 /* propagate symmetry info of coarse matrix */ 4699 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4700 if (pc->pmat->symmetric_set) { 4701 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4702 } 4703 if (pc->pmat->hermitian_set) { 4704 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4705 } 4706 if (pc->pmat->spd_set) { 4707 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4708 } 4709 /* set operators */ 4710 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4711 if (pcbddc->dbg_flag) { 4712 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4713 } 4714 } else { /* processes non partecipating to coarse solver (if any) */ 4715 coarse_mat = 0; 4716 } 4717 ierr = PetscFree(isarray);CHKERRQ(ierr); 4718 #if 0 4719 { 4720 PetscViewer viewer; 4721 char filename[256]; 4722 sprintf(filename,"coarse_mat.m"); 4723 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4724 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4725 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4726 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4727 } 4728 #endif 4729 4730 /* Compute coarse null space (special handling by BDDC only) */ 4731 #if 0 4732 if (pcbddc->NullSpace) { 4733 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4734 } 4735 #endif 4736 4737 if (pcbddc->coarse_ksp) { 4738 Vec crhs,csol; 4739 PetscBool ispreonly; 4740 4741 if (CoarseNullSpace) { 4742 if (isbddc) { 4743 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4744 } else { 4745 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4746 } 4747 } 4748 /* setup coarse ksp */ 4749 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4750 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4751 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4752 /* hack */ 4753 if (!csol) { 4754 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4755 } 4756 if (!crhs) { 4757 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4758 } 4759 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4760 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4761 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4762 KSP check_ksp; 4763 KSPType check_ksp_type; 4764 PC check_pc; 4765 Vec check_vec,coarse_vec; 4766 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4767 PetscInt its; 4768 PetscBool compute_eigs; 4769 PetscReal *eigs_r,*eigs_c; 4770 PetscInt neigs; 4771 const char *prefix; 4772 4773 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4774 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4775 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4776 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4777 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4778 if (ispreonly) { 4779 check_ksp_type = KSPPREONLY; 4780 compute_eigs = PETSC_FALSE; 4781 } else { 4782 check_ksp_type = KSPGMRES; 4783 compute_eigs = PETSC_TRUE; 4784 } 4785 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4786 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4787 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4788 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4789 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4790 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4791 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4792 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4793 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4794 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4795 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4796 /* create random vec */ 4797 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4798 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4799 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4800 if (CoarseNullSpace) { 4801 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4802 } 4803 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4804 /* solve coarse problem */ 4805 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4806 if (CoarseNullSpace) { 4807 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4808 } 4809 /* set eigenvalue estimation if preonly has not been requested */ 4810 if (compute_eigs) { 4811 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4812 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4813 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4814 lambda_max = eigs_r[neigs-1]; 4815 lambda_min = eigs_r[0]; 4816 if (pcbddc->use_coarse_estimates) { 4817 if (lambda_max>lambda_min) { 4818 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4819 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4820 } 4821 } 4822 } 4823 4824 /* check coarse problem residual error */ 4825 if (pcbddc->dbg_flag) { 4826 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4827 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4828 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4829 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4830 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4831 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4832 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4833 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4834 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4835 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4836 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4837 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4838 if (compute_eigs) { 4839 PetscReal lambda_max_s,lambda_min_s; 4840 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4841 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4842 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4843 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4844 for (i=0;i<neigs;i++) { 4845 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4846 } 4847 } 4848 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4849 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4850 } 4851 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4852 if (compute_eigs) { 4853 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4854 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4855 } 4856 } 4857 } 4858 /* print additional info */ 4859 if (pcbddc->dbg_flag) { 4860 /* waits until all processes reaches this point */ 4861 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4862 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4863 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4864 } 4865 4866 /* free memory */ 4867 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4868 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4869 PetscFunctionReturn(0); 4870 } 4871 4872 #undef __FUNCT__ 4873 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4874 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4875 { 4876 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4877 PC_IS* pcis = (PC_IS*)pc->data; 4878 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4879 IS subset,subset_mult,subset_n; 4880 PetscInt local_size,coarse_size=0; 4881 PetscInt *local_primal_indices=NULL; 4882 const PetscInt *t_local_primal_indices; 4883 PetscErrorCode ierr; 4884 4885 PetscFunctionBegin; 4886 /* Compute global number of coarse dofs */ 4887 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4888 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4889 } 4890 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4891 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4892 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4893 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4894 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4895 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4896 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4897 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4898 if (local_size != pcbddc->local_primal_size) { 4899 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4900 } 4901 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4902 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4903 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4904 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4905 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4906 4907 /* check numbering */ 4908 if (pcbddc->dbg_flag) { 4909 PetscScalar coarsesum,*array,*array2; 4910 PetscInt i; 4911 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4912 4913 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4914 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4915 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4916 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4917 /* counter */ 4918 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4919 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4920 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4921 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4922 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4923 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4924 4925 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4926 for (i=0;i<pcbddc->local_primal_size;i++) { 4927 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4928 } 4929 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4930 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4931 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4932 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4933 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4934 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4935 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4936 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4937 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4938 for (i=0;i<pcis->n;i++) { 4939 if (array[i] != 0.0 && array[i] != array2[i]) { 4940 PetscInt owned = (PetscInt)(array[i]); 4941 PetscInt neigh = (PetscInt)(array2[i]); 4942 set_error = PETSC_TRUE; 4943 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 4944 } 4945 } 4946 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4947 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4948 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4949 for (i=0;i<pcis->n;i++) { 4950 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4951 } 4952 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4953 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4954 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4955 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4956 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4957 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4958 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4959 PetscInt *gidxs; 4960 4961 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4962 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4963 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4964 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4965 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4966 for (i=0;i<pcbddc->local_primal_size;i++) { 4967 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4968 } 4969 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4970 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4971 } 4972 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4973 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4974 } 4975 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4976 /* get back data */ 4977 *coarse_size_n = coarse_size; 4978 *local_primal_indices_n = local_primal_indices; 4979 PetscFunctionReturn(0); 4980 } 4981 4982 #undef __FUNCT__ 4983 #define __FUNCT__ "PCBDDCGlobalToLocal" 4984 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4985 { 4986 IS localis_t; 4987 PetscInt i,lsize,*idxs,n; 4988 PetscScalar *vals; 4989 PetscErrorCode ierr; 4990 4991 PetscFunctionBegin; 4992 /* get indices in local ordering exploiting local to global map */ 4993 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4994 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4995 for (i=0;i<lsize;i++) vals[i] = 1.0; 4996 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4997 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4998 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4999 if (idxs) { /* multilevel guard */ 5000 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5001 } 5002 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5003 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5004 ierr = PetscFree(vals);CHKERRQ(ierr); 5005 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5006 /* now compute set in local ordering */ 5007 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5008 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5009 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5010 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5011 for (i=0,lsize=0;i<n;i++) { 5012 if (PetscRealPart(vals[i]) > 0.5) { 5013 lsize++; 5014 } 5015 } 5016 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5017 for (i=0,lsize=0;i<n;i++) { 5018 if (PetscRealPart(vals[i]) > 0.5) { 5019 idxs[lsize++] = i; 5020 } 5021 } 5022 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5023 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5024 *localis = localis_t; 5025 PetscFunctionReturn(0); 5026 } 5027 5028 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5029 #undef __FUNCT__ 5030 #define __FUNCT__ "PCBDDCMatMult_Private" 5031 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5032 { 5033 PCBDDCChange_ctx change_ctx; 5034 PetscErrorCode ierr; 5035 5036 PetscFunctionBegin; 5037 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5038 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5039 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5040 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5041 PetscFunctionReturn(0); 5042 } 5043 5044 #undef __FUNCT__ 5045 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5046 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5047 { 5048 PCBDDCChange_ctx change_ctx; 5049 PetscErrorCode ierr; 5050 5051 PetscFunctionBegin; 5052 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5053 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5054 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5055 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5056 PetscFunctionReturn(0); 5057 } 5058 5059 #undef __FUNCT__ 5060 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5061 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5062 { 5063 PC_IS *pcis=(PC_IS*)pc->data; 5064 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5065 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5066 Mat S_j; 5067 PetscInt *used_xadj,*used_adjncy; 5068 PetscBool free_used_adj; 5069 PetscErrorCode ierr; 5070 5071 PetscFunctionBegin; 5072 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5073 free_used_adj = PETSC_FALSE; 5074 if (pcbddc->sub_schurs_layers == -1) { 5075 used_xadj = NULL; 5076 used_adjncy = NULL; 5077 } else { 5078 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5079 used_xadj = pcbddc->mat_graph->xadj; 5080 used_adjncy = pcbddc->mat_graph->adjncy; 5081 } else if (pcbddc->computed_rowadj) { 5082 used_xadj = pcbddc->mat_graph->xadj; 5083 used_adjncy = pcbddc->mat_graph->adjncy; 5084 } else { 5085 PetscBool flg_row=PETSC_FALSE; 5086 const PetscInt *xadj,*adjncy; 5087 PetscInt nvtxs; 5088 5089 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5090 if (flg_row) { 5091 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5092 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5093 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5094 free_used_adj = PETSC_TRUE; 5095 } else { 5096 pcbddc->sub_schurs_layers = -1; 5097 used_xadj = NULL; 5098 used_adjncy = NULL; 5099 } 5100 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5101 } 5102 } 5103 5104 /* setup sub_schurs data */ 5105 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5106 if (!sub_schurs->use_mumps) { 5107 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5108 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5109 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5110 } else { 5111 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5112 PetscBool isseqaij; 5113 if (!pcbddc->use_vertices && reuse_solvers) { 5114 PetscInt n_vertices; 5115 5116 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5117 reuse_solvers = (PetscBool)!n_vertices; 5118 } 5119 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5120 if (!isseqaij) { 5121 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5122 if (matis->A == pcbddc->local_mat) { 5123 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5124 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5125 } else { 5126 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5127 } 5128 } 5129 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5130 } 5131 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5132 5133 /* free adjacency */ 5134 if (free_used_adj) { 5135 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5136 } 5137 PetscFunctionReturn(0); 5138 } 5139 5140 #undef __FUNCT__ 5141 #define __FUNCT__ "PCBDDCInitSubSchurs" 5142 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5143 { 5144 PC_IS *pcis=(PC_IS*)pc->data; 5145 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5146 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5147 PCBDDCGraph graph; 5148 PetscErrorCode ierr; 5149 5150 PetscFunctionBegin; 5151 /* attach interface graph for determining subsets */ 5152 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5153 IS verticesIS,verticescomm; 5154 PetscInt vsize,*idxs; 5155 5156 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5157 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5158 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5159 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5160 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5161 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5162 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5163 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5164 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5165 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5166 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5167 /* 5168 if (pcbddc->dbg_flag) { 5169 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5170 } 5171 */ 5172 } else { 5173 graph = pcbddc->mat_graph; 5174 } 5175 5176 /* sub_schurs init */ 5177 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5178 5179 /* free graph struct */ 5180 if (pcbddc->sub_schurs_rebuild) { 5181 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5182 } 5183 PetscFunctionReturn(0); 5184 } 5185