1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignPopOrPushP0" 10 PetscErrorCode PCBDDCBenignPopOrPushP0(PC pc, Vec v, PetscBool pop) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 if (!pcbddc->benign_sf) { 17 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 18 if (pcbddc->benign_p0_gidx >= 0) { 19 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 20 } else { 21 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 22 } 23 } 24 if (pop) { /* use SF to get values */ 25 PetscScalar *array; 26 27 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 28 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 29 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 30 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 31 } else { /* use VecSetValue */ 32 if (pcbddc->benign_p0_gidx >= 0) { 33 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 34 } 35 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 36 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 43 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 44 { 45 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 46 PetscErrorCode ierr; 47 48 PetscFunctionBegin; 49 /* TODO: add error checking 50 - avoid nested pop (or push) calls. 51 - cannot push before pop. 52 - cannot call this if pcbddc->local_mat is NULL 53 */ 54 if (pcbddc->benign_p0_lidx < 0) { 55 PetscFunctionReturn(0); 56 } 57 if (pop) { 58 const PetscInt *cB0_cols; 59 PetscInt cB0_ncol; 60 const PetscScalar *cB0_vals; 61 62 /* extract B_0 */ 63 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 64 pcbddc->B0_ncol = cB0_ncol; 65 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 66 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 67 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 68 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 69 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 70 /* remove rows and cols from local problem */ 71 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 72 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 73 } else { /* push */ 74 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 75 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 76 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 77 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 78 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 79 } 80 PetscFunctionReturn(0); 81 } 82 83 #undef __FUNCT__ 84 #define __FUNCT__ "PCBDDCAdaptiveSelection" 85 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 86 { 87 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 88 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 89 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 90 PetscBLASInt *B_iwork,*B_ifail; 91 PetscScalar *work,lwork; 92 PetscScalar *St,*S,*eigv; 93 PetscScalar *Sarray,*Starray; 94 PetscReal *eigs,thresh; 95 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 96 PetscBool allocated_S_St; 97 #if defined(PETSC_USE_COMPLEX) 98 PetscReal *rwork; 99 #endif 100 PetscErrorCode ierr; 101 102 PetscFunctionBegin; 103 if (!sub_schurs->use_mumps) { 104 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 105 } 106 107 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 108 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 109 } 110 111 if (pcbddc->dbg_flag) { 112 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 113 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 114 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 115 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 116 } 117 118 if (pcbddc->dbg_flag) { 119 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 120 } 121 122 /* max size of subsets */ 123 mss = 0; 124 for (i=0;i<sub_schurs->n_subs;i++) { 125 PetscInt subset_size; 126 127 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 128 mss = PetscMax(mss,subset_size); 129 } 130 131 /* min/max and threshold */ 132 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 133 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 134 nmax = PetscMax(nmin,nmax); 135 allocated_S_St = PETSC_FALSE; 136 if (nmin) { 137 allocated_S_St = PETSC_TRUE; 138 } 139 140 /* allocate lapack workspace */ 141 cum = cum2 = 0; 142 maxneigs = 0; 143 for (i=0;i<sub_schurs->n_subs;i++) { 144 PetscInt n,subset_size; 145 146 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 147 n = PetscMin(subset_size,nmax); 148 cum += subset_size; 149 cum2 += subset_size*n; 150 maxneigs = PetscMax(maxneigs,n); 151 } 152 if (mss) { 153 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 154 PetscBLASInt B_itype = 1; 155 PetscBLASInt B_N = mss; 156 PetscReal zero = 0.0; 157 PetscReal eps = 0.0; /* dlamch? */ 158 159 B_lwork = -1; 160 S = NULL; 161 St = NULL; 162 eigs = NULL; 163 eigv = NULL; 164 B_iwork = NULL; 165 B_ifail = NULL; 166 #if defined(PETSC_USE_COMPLEX) 167 rwork = NULL; 168 #endif 169 thresh = 1.0; 170 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 171 #if defined(PETSC_USE_COMPLEX) 172 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 173 #else 174 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 175 #endif 176 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 177 ierr = PetscFPTrapPop();CHKERRQ(ierr); 178 } else { 179 /* TODO */ 180 } 181 } else { 182 lwork = 0; 183 } 184 185 nv = 0; 186 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 187 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 188 } 189 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 190 if (allocated_S_St) { 191 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 192 } 193 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 194 #if defined(PETSC_USE_COMPLEX) 195 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 196 #endif 197 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 198 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 199 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 200 nv+cum,&pcbddc->adaptive_constraints_idxs, 201 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 202 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 203 204 maxneigs = 0; 205 cum = cum2 = cumarray = 0; 206 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 207 pcbddc->adaptive_constraints_data_ptr[0] = 0; 208 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 209 const PetscInt *idxs; 210 211 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 212 for (cum=0;cum<nv;cum++) { 213 pcbddc->adaptive_constraints_n[cum] = 1; 214 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 215 pcbddc->adaptive_constraints_data[cum] = 1.0; 216 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 217 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 218 } 219 cum2 = cum; 220 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 221 } 222 223 if (mss) { /* multilevel */ 224 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 225 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 226 } 227 228 for (i=0;i<sub_schurs->n_subs;i++) { 229 230 const PetscInt *idxs; 231 PetscReal infty = PETSC_MAX_REAL; 232 PetscInt j,subset_size,eigs_start = 0; 233 PetscBLASInt B_N; 234 PetscBool same_data = PETSC_FALSE; 235 236 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 237 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 238 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 239 if (sub_schurs->is_hermitian) { 240 PetscInt j,k; 241 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 242 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 243 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 244 } 245 for (j=0;j<subset_size;j++) { 246 for (k=j;k<subset_size;k++) { 247 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 248 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 249 } 250 } 251 } else { 252 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 254 } 255 } else { 256 S = Sarray + cumarray; 257 St = Starray + cumarray; 258 } 259 260 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 261 /* see if we can save some work */ 262 if (sub_schurs->n_subs == 1) { 263 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 264 } 265 266 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 267 B_neigs = 0; 268 } else { 269 /* Threshold: this is an heuristic for edges */ 270 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 271 272 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 273 PetscBLASInt B_itype = 1; 274 PetscBLASInt B_IL, B_IU; 275 PetscReal eps = -1.0; /* dlamch? */ 276 PetscInt nmin_s; 277 278 if (pcbddc->dbg_flag) { 279 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 280 } 281 282 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 283 if (thresh > 1.+PETSC_SMALL) { 284 285 /* ask for eigenvalues larger than thresh */ 286 #if defined(PETSC_USE_COMPLEX) 287 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 288 #else 289 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 290 #endif 291 } else { 292 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 293 B_IL = 1; 294 #if defined(PETSC_USE_COMPLEX) 295 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 296 #else 297 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 298 #endif 299 } 300 ierr = PetscFPTrapPop();CHKERRQ(ierr); 301 if (B_ierr) { 302 if (B_ierr < 0 ) { 303 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 304 } else if (B_ierr <= B_N) { 305 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 306 } else { 307 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 308 } 309 } 310 311 if (B_neigs > nmax) { 312 if (pcbddc->dbg_flag) { 313 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 314 } 315 eigs_start = B_neigs -nmax; 316 B_neigs = nmax; 317 } 318 319 nmin_s = PetscMin(nmin,B_N); 320 if (B_neigs < nmin_s) { 321 PetscBLASInt B_neigs2; 322 323 B_IU = B_N - B_neigs; 324 B_IL = B_N - nmin_s + 1; 325 if (pcbddc->dbg_flag) { 326 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 327 } 328 if (sub_schurs->is_hermitian) { 329 PetscInt j; 330 for (j=0;j<subset_size;j++) { 331 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 332 } 333 for (j=0;j<subset_size;j++) { 334 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 } else { 337 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 338 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 339 } 340 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 341 #if defined(PETSC_USE_COMPLEX) 342 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 343 #else 344 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 345 #endif 346 ierr = PetscFPTrapPop();CHKERRQ(ierr); 347 B_neigs += B_neigs2; 348 } 349 if (B_ierr) { 350 if (B_ierr < 0 ) { 351 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 352 } else if (B_ierr <= B_N) { 353 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 354 } else { 355 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 356 } 357 } 358 if (pcbddc->dbg_flag) { 359 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 360 for (j=0;j<B_neigs;j++) { 361 if (eigs[j] == 0.0) { 362 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 363 } else { 364 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 365 } 366 } 367 } 368 } else { 369 /* TODO */ 370 } 371 } 372 maxneigs = PetscMax(B_neigs,maxneigs); 373 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 374 if (B_neigs) { 375 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 376 377 if (pcbddc->dbg_flag > 1) { 378 PetscInt ii; 379 for (ii=0;ii<B_neigs;ii++) { 380 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 381 for (j=0;j<B_N;j++) { 382 #if defined(PETSC_USE_COMPLEX) 383 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 384 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 385 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 386 #else 387 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 388 #endif 389 } 390 } 391 } 392 #if 0 393 for (j=0;j<B_neigs;j++) { 394 PetscBLASInt Blas_N,Blas_one = 1.0; 395 PetscScalar norm; 396 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 397 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 398 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 399 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 400 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 401 } else { 402 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 403 } 404 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 405 } 406 #endif 407 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 408 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 409 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 410 cum++; 411 } 412 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 413 /* shift for next computation */ 414 cumarray += subset_size*subset_size; 415 } 416 if (pcbddc->dbg_flag) { 417 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 418 } 419 420 if (mss) { 421 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 422 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 423 /* destroy matrices (junk) */ 424 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 425 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 426 } 427 if (allocated_S_St) { 428 ierr = PetscFree2(S,St);CHKERRQ(ierr); 429 } 430 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 431 #if defined(PETSC_USE_COMPLEX) 432 ierr = PetscFree(rwork);CHKERRQ(ierr); 433 #endif 434 if (pcbddc->dbg_flag) { 435 PetscInt maxneigs_r; 436 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 437 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 438 } 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "PCBDDCSetUpSolvers" 444 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 445 { 446 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 447 PetscScalar *coarse_submat_vals; 448 PetscErrorCode ierr; 449 450 PetscFunctionBegin; 451 /* Setup local scatters R_to_B and (optionally) R_to_D */ 452 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 453 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 454 455 /* Setup local neumann solver ksp_R */ 456 /* PCBDDCSetUpLocalScatters should be called first! */ 457 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 458 459 /* Change global null space passed in by the user if change of basis has been requested */ 460 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 461 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 462 } 463 464 /* 465 Setup local correction and local part of coarse basis. 466 Gives back the dense local part of the coarse matrix in column major ordering 467 */ 468 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 469 470 /* Compute total number of coarse nodes and setup coarse solver */ 471 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 472 473 /* free */ 474 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "PCBDDCResetCustomization" 480 PetscErrorCode PCBDDCResetCustomization(PC pc) 481 { 482 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 483 PetscErrorCode ierr; 484 485 PetscFunctionBegin; 486 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 487 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 488 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 489 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 490 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 491 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 492 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 493 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 494 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 495 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 496 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "PCBDDCResetTopography" 502 PetscErrorCode PCBDDCResetTopography(PC pc) 503 { 504 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 509 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 510 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 511 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 512 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 513 PetscFunctionReturn(0); 514 } 515 516 #undef __FUNCT__ 517 #define __FUNCT__ "PCBDDCResetSolvers" 518 PetscErrorCode PCBDDCResetSolvers(PC pc) 519 { 520 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 521 PetscScalar *array; 522 PetscErrorCode ierr; 523 524 PetscFunctionBegin; 525 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 526 if (pcbddc->coarse_phi_B) { 527 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 528 ierr = PetscFree(array);CHKERRQ(ierr); 529 } 530 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 531 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 532 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 533 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 534 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 535 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 536 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 537 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 538 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 539 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 540 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 541 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 542 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 543 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 544 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 545 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 546 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 547 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 548 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 549 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 550 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 551 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 552 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 553 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 554 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 555 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 556 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 557 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 563 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 564 { 565 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 566 PC_IS *pcis = (PC_IS*)pc->data; 567 VecType impVecType; 568 PetscInt n_constraints,n_R,old_size,n_benign; 569 PetscErrorCode ierr; 570 571 PetscFunctionBegin; 572 if (!pcbddc->ConstraintMatrix) { 573 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 574 } 575 /* get sizes */ 576 n_benign = 0; 577 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 578 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 579 n_R = pcis->n - pcbddc->n_vertices; 580 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 581 /* local work vectors (try to avoid unneeded work)*/ 582 /* R nodes */ 583 old_size = -1; 584 if (pcbddc->vec1_R) { 585 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 586 } 587 if (n_R != old_size) { 588 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 589 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 590 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 591 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 592 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 593 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 594 } 595 /* local primal dofs */ 596 old_size = -1; 597 if (pcbddc->vec1_P) { 598 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 599 } 600 if (pcbddc->local_primal_size != old_size) { 601 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 602 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 603 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 604 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 605 } 606 /* local explicit constraints */ 607 old_size = -1; 608 if (pcbddc->vec1_C) { 609 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 610 } 611 if (n_constraints && n_constraints != old_size) { 612 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 613 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 614 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 615 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 616 } 617 PetscFunctionReturn(0); 618 } 619 620 #undef __FUNCT__ 621 #define __FUNCT__ "PCBDDCSetUpCorrection" 622 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 623 { 624 PetscErrorCode ierr; 625 /* pointers to pcis and pcbddc */ 626 PC_IS* pcis = (PC_IS*)pc->data; 627 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 628 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 629 /* submatrices of local problem */ 630 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 631 /* submatrices of benign trick */ 632 Mat B0_V = NULL; 633 /* submatrices of local coarse problem */ 634 Mat S_VV,S_CV,S_VC,S_CC; 635 /* working matrices */ 636 Mat C_CR; 637 /* additional working stuff */ 638 PC pc_R; 639 Mat F,B0 = NULL; 640 PetscBool isLU,isCHOL,isILU; 641 642 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 643 PetscScalar *work; 644 PetscInt *idx_V_B; 645 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 646 PetscInt i,n_R,n_D,n_B; 647 PetscBool unsymmetric_check; 648 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 649 MatType impMatType; 650 /* some shortcuts to scalars */ 651 PetscScalar one=1.0,m_one=-1.0; 652 653 PetscFunctionBegin; 654 n_benign = 0; 655 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 656 n_vertices = pcbddc->n_vertices; 657 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 658 /* Set Non-overlapping dimensions */ 659 n_B = pcis->n_B; 660 n_D = pcis->n - n_B; 661 n_R = pcis->n - n_vertices; 662 663 /* Set types for local objects needed by BDDC precondtioner */ 664 impMatType = MATSEQDENSE; 665 666 /* vertices in boundary numbering */ 667 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 668 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 669 if (i != n_vertices) { 670 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 671 } 672 673 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 674 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 675 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 676 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 677 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 678 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 679 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 680 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 681 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 682 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 683 684 unsymmetric_check = PETSC_FALSE; 685 /* allocate workspace */ 686 n = 0; 687 if (n_constraints) { 688 n += n_R*n_constraints; 689 } 690 if (n_vertices) { 691 n = PetscMax(2*n_R*n_vertices,n); 692 n = PetscMax((n_R+n_B)*n_vertices,n); 693 } 694 if (!pcbddc->symmetric_primal) { 695 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 696 unsymmetric_check = PETSC_TRUE; 697 } 698 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 699 700 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 701 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 702 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 703 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 704 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 705 if (isLU || isILU || isCHOL) { 706 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 707 } else if (sub_schurs->reuse_mumps) { 708 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 709 MatFactorType type; 710 711 F = reuse_mumps->F; 712 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 713 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 714 } else { 715 F = NULL; 716 } 717 718 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 719 if (n_constraints) { 720 Mat M1,M2,M3; 721 Mat auxmat; 722 IS is_aux; 723 PetscScalar *array,*array2; 724 725 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 726 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 727 728 /* Extract constraints on R nodes: C_{CR} */ 729 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 730 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 731 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 732 733 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 734 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 735 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 736 for (i=0;i<n_constraints;i++) { 737 const PetscScalar *row_cmat_values; 738 const PetscInt *row_cmat_indices; 739 PetscInt size_of_constraint,j; 740 741 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 742 for (j=0;j<size_of_constraint;j++) { 743 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 744 } 745 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 746 } 747 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 748 if (F) { 749 Mat B; 750 751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 752 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 753 ierr = MatDestroy(&B);CHKERRQ(ierr); 754 } else { 755 PetscScalar *marr; 756 757 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 758 for (i=0;i<n_constraints;i++) { 759 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 760 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 761 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 762 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 763 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 764 } 765 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 766 } 767 if (!pcbddc->switch_static) { 768 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 769 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 770 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 771 for (i=0;i<n_constraints;i++) { 772 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 773 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 774 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 775 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 776 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 777 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 778 } 779 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 780 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 781 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 782 } else { 783 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 784 pcbddc->local_auxmat2 = local_auxmat2_R; 785 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 786 } 787 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 788 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 789 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 790 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 791 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 792 if (isCHOL) { 793 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 794 } else { 795 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 796 } 797 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 798 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 799 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 800 ierr = MatDestroy(&M2);CHKERRQ(ierr); 801 ierr = MatDestroy(&M3);CHKERRQ(ierr); 802 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 803 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 804 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 805 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 806 ierr = MatDestroy(&M1);CHKERRQ(ierr); 807 } 808 /* Get submatrices from subdomain matrix */ 809 if (n_benign) { 810 IS dummy; 811 Mat B0_R; 812 PetscReal norm; 813 PetscInt ii[2]; 814 815 ii[0] = 0; 816 ii[1] = pcbddc->B0_ncol; 817 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 818 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 819 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 820 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 821 if (norm > PETSC_SMALL) { 822 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 823 } 824 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 825 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 826 } 827 828 if (n_vertices) { 829 IS is_aux; 830 831 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 832 IS tis; 833 834 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 835 ierr = ISSort(tis);CHKERRQ(ierr); 836 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 837 ierr = ISDestroy(&tis);CHKERRQ(ierr); 838 } else { 839 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 840 } 841 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 842 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 843 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 844 if (n_benign) { 845 IS dummy; 846 847 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 848 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 849 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 850 } 851 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 852 } 853 854 /* Matrix of coarse basis functions (local) */ 855 if (pcbddc->coarse_phi_B) { 856 PetscInt on_B,on_primal,on_D=n_D; 857 if (pcbddc->coarse_phi_D) { 858 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 859 } 860 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 861 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 862 PetscScalar *marray; 863 864 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 865 ierr = PetscFree(marray);CHKERRQ(ierr); 866 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 867 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 868 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 869 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 870 } 871 } 872 873 if (!pcbddc->coarse_phi_B) { 874 PetscScalar *marray; 875 876 n = n_B*pcbddc->local_primal_size; 877 if (pcbddc->switch_static || pcbddc->dbg_flag) { 878 n += n_D*pcbddc->local_primal_size; 879 } 880 if (!pcbddc->symmetric_primal) { 881 n *= 2; 882 } 883 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 884 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 885 n = n_B*pcbddc->local_primal_size; 886 if (pcbddc->switch_static || pcbddc->dbg_flag) { 887 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 888 n += n_D*pcbddc->local_primal_size; 889 } 890 if (!pcbddc->symmetric_primal) { 891 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 892 if (pcbddc->switch_static || pcbddc->dbg_flag) { 893 n = n_B*pcbddc->local_primal_size; 894 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 895 } 896 } else { 897 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 898 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 899 if (pcbddc->switch_static || pcbddc->dbg_flag) { 900 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 901 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 902 } 903 } 904 } 905 906 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 907 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 908 const PetscInt *idxs; 909 910 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 911 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 912 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 913 } 914 915 /* vertices */ 916 if (n_vertices) { 917 918 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 919 920 if (n_R) { 921 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 922 PetscBLASInt B_N,B_one = 1; 923 PetscScalar *x,*y; 924 PetscBool isseqaij; 925 926 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 927 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 928 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 929 if (F) { /* TODO could be optimized for symmetric problems */ 930 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 931 } else { 932 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 933 for (i=0;i<n_vertices;i++) { 934 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 935 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 936 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 937 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 938 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 939 } 940 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 941 } 942 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 943 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 944 if (n_constraints) { 945 Mat B; 946 947 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 948 for (i=0;i<n_vertices;i++) { 949 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 950 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 951 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 953 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 954 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 955 } 956 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 957 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 958 ierr = MatDestroy(&B);CHKERRQ(ierr); 959 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 960 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 961 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 962 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 963 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 964 ierr = MatDestroy(&B);CHKERRQ(ierr); 965 } 966 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 967 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 968 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 969 } 970 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 971 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 972 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 973 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 974 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 975 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 976 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 977 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 978 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 979 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 980 } else { 981 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 982 } 983 if (n_benign) { 984 const PetscScalar *vals; 985 const PetscInt *idxs; 986 PetscInt n,j; 987 988 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 989 for (j=0;j<n;j++) { 990 coarse_submat_vals[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+idxs[j]] = vals[j]; 991 coarse_submat_vals[(idxs[j]+1)*pcbddc->local_primal_size-1] = vals[j]; 992 } 993 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 994 } 995 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 996 997 /* coarse basis functions */ 998 for (i=0;i<n_vertices;i++) { 999 PetscScalar *y; 1000 1001 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1002 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1003 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1004 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 y[n_B*i+idx_V_B[i]] = 1.0; 1007 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1008 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1009 1010 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1011 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1012 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1013 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1015 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1016 if (n_benign) y[n_D*i+p0_lidx_I] = 0.0; 1017 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1018 } 1019 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1020 } 1021 /* if n_R == 0 the object is not destroyed */ 1022 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1023 } 1024 1025 if (n_constraints) { 1026 Mat B; 1027 1028 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1029 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1030 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1031 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1032 if (n_vertices) { 1033 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1034 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1035 } else { 1036 Mat S_VCt; 1037 1038 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1039 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1040 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1041 } 1042 } 1043 ierr = MatDestroy(&B);CHKERRQ(ierr); 1044 /* coarse basis functions */ 1045 for (i=0;i<n_constraints;i++) { 1046 PetscScalar *y; 1047 1048 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1049 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1050 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1051 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1054 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1055 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1056 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1057 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1058 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1059 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1060 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1061 if (n_benign) y[n_D*(i+n_vertices)+p0_lidx_I] = 0.0; 1062 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1063 } 1064 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1065 } 1066 } 1067 if (n_constraints) { 1068 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1069 } 1070 1071 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1072 1073 /* compute other basis functions for non-symmetric problems */ 1074 if (!pcbddc->symmetric_primal) { 1075 1076 if (n_constraints) { 1077 Mat S_CCT,B_C; 1078 1079 /* this is a lazy thing */ 1080 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1081 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1082 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1083 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1084 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1085 if (n_vertices) { 1086 Mat B_V,S_VCT; 1087 1088 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1089 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1090 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1091 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1092 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1093 } 1094 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1095 } else { /* if there are no constraints, reset work */ 1096 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1097 } 1098 if (n_vertices && n_R) { 1099 Mat A_VRT; 1100 PetscScalar *marray; 1101 PetscBLASInt B_N,B_one = 1; 1102 1103 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1104 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1105 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1106 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1107 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1108 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1109 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1110 } 1111 1112 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1113 for (i=0;i<pcbddc->local_primal_size;i++) { 1114 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1115 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1116 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1117 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1118 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1119 } 1120 } else { 1121 for (i=0;i<pcbddc->local_primal_size;i++) { 1122 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1123 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1124 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1125 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1126 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1127 } 1128 } 1129 /* coarse basis functions */ 1130 for (i=0;i<pcbddc->local_primal_size;i++) { 1131 PetscScalar *y; 1132 1133 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1134 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1135 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1136 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1137 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 if (i<n_vertices) { 1139 y[n_B*i+idx_V_B[i]] = 1.0; 1140 } 1141 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1142 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1143 1144 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1145 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1146 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1147 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1148 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1149 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1150 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1151 } 1152 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1153 } 1154 } 1155 /* free memory */ 1156 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1157 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1158 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1159 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1160 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1161 ierr = PetscFree(work);CHKERRQ(ierr); 1162 if (n_vertices) { 1163 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1164 } 1165 if (n_constraints) { 1166 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1167 } 1168 /* Checking coarse_sub_mat and coarse basis functios */ 1169 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1170 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1171 if (pcbddc->dbg_flag) { 1172 Mat coarse_sub_mat; 1173 Mat AUXMAT,TM1,TM2,TM3,TM4; 1174 Mat coarse_phi_D,coarse_phi_B; 1175 Mat coarse_psi_D,coarse_psi_B; 1176 Mat A_II,A_BB,A_IB,A_BI; 1177 Mat C_B,CPHI; 1178 IS is_dummy; 1179 Vec mones; 1180 MatType checkmattype=MATSEQAIJ; 1181 PetscReal real_value; 1182 1183 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1184 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1185 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1186 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1187 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1188 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1189 if (unsymmetric_check) { 1190 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1191 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1192 } 1193 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1194 1195 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1196 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1197 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1198 if (unsymmetric_check) { 1199 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1200 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1201 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1202 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1203 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1204 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1205 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1206 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1207 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1208 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1209 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1210 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1211 } else { 1212 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1213 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1214 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1215 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1216 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1217 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1218 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1219 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1220 } 1221 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1222 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1223 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1224 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1225 if (n_benign) { 1226 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1227 PetscScalar *data,*data2; 1228 1229 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1230 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1231 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1232 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1233 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1234 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1235 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1236 for (i=0;i<pcbddc->local_primal_size;i++) { 1237 data[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+i] += data2[i]; 1238 data[(i+1)*pcbddc->local_primal_size-1] += data2[i]; 1239 } 1240 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1241 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1242 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1243 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1244 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1245 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1246 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1247 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1248 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1249 } 1250 #if 0 1251 { 1252 PetscViewer viewer; 1253 char filename[256]; 1254 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1255 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1256 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1257 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1258 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1259 } 1260 #endif 1261 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1262 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1263 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1265 1266 /* check constraints */ 1267 if (!n_benign) { 1268 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1269 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1270 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1271 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1272 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1273 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1274 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1276 if (unsymmetric_check) { 1277 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1278 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1279 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1280 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1282 } 1283 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1284 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1285 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1286 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1287 } 1288 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1289 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1290 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1291 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1292 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1293 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1294 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1295 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1296 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1297 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1298 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1299 if (unsymmetric_check) { 1300 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1301 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1302 } 1303 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1304 } 1305 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1306 /* get back data */ 1307 *coarse_submat_vals_n = coarse_submat_vals; 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1313 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1314 { 1315 Mat *work_mat; 1316 IS isrow_s,iscol_s; 1317 PetscBool rsorted,csorted; 1318 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1323 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1324 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1325 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1326 1327 if (!rsorted) { 1328 const PetscInt *idxs; 1329 PetscInt *idxs_sorted,i; 1330 1331 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1332 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1333 for (i=0;i<rsize;i++) { 1334 idxs_perm_r[i] = i; 1335 } 1336 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1337 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1338 for (i=0;i<rsize;i++) { 1339 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1340 } 1341 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1342 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1343 } else { 1344 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1345 isrow_s = isrow; 1346 } 1347 1348 if (!csorted) { 1349 if (isrow == iscol) { 1350 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1351 iscol_s = isrow_s; 1352 } else { 1353 const PetscInt *idxs; 1354 PetscInt *idxs_sorted,i; 1355 1356 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1357 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1358 for (i=0;i<csize;i++) { 1359 idxs_perm_c[i] = i; 1360 } 1361 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1362 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1363 for (i=0;i<csize;i++) { 1364 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1365 } 1366 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1367 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1368 } 1369 } else { 1370 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1371 iscol_s = iscol; 1372 } 1373 1374 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1375 1376 if (!rsorted || !csorted) { 1377 Mat new_mat; 1378 IS is_perm_r,is_perm_c; 1379 1380 if (!rsorted) { 1381 PetscInt *idxs_r,i; 1382 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1383 for (i=0;i<rsize;i++) { 1384 idxs_r[idxs_perm_r[i]] = i; 1385 } 1386 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1387 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1388 } else { 1389 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1390 } 1391 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1392 1393 if (!csorted) { 1394 if (isrow_s == iscol_s) { 1395 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1396 is_perm_c = is_perm_r; 1397 } else { 1398 PetscInt *idxs_c,i; 1399 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1400 for (i=0;i<csize;i++) { 1401 idxs_c[idxs_perm_c[i]] = i; 1402 } 1403 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1404 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1405 } 1406 } else { 1407 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1408 } 1409 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1410 1411 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1412 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1413 work_mat[0] = new_mat; 1414 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1415 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1416 } 1417 1418 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1419 *B = work_mat[0]; 1420 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1421 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1422 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1428 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1429 { 1430 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1431 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1432 Mat new_mat; 1433 IS is_local,is_global; 1434 PetscInt local_size; 1435 PetscBool isseqaij; 1436 PetscErrorCode ierr; 1437 1438 PetscFunctionBegin; 1439 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1440 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1441 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1442 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1443 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1444 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1445 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1446 1447 /* check */ 1448 if (pcbddc->dbg_flag) { 1449 Vec x,x_change; 1450 PetscReal error; 1451 1452 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1453 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1454 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1455 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1456 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1458 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1459 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1460 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1461 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1462 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1463 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1464 ierr = VecDestroy(&x);CHKERRQ(ierr); 1465 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1466 } 1467 1468 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1469 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1470 if (isseqaij) { 1471 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1472 } else { 1473 Mat work_mat; 1474 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1475 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1476 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1477 } 1478 if (matis->A->symmetric_set) { 1479 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1480 #if !defined(PETSC_USE_COMPLEX) 1481 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1482 #endif 1483 } 1484 /* 1485 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1486 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1487 */ 1488 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1489 PetscFunctionReturn(0); 1490 } 1491 1492 #undef __FUNCT__ 1493 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1494 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1495 { 1496 PC_IS* pcis = (PC_IS*)(pc->data); 1497 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1498 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1499 PetscInt *idx_R_local=NULL; 1500 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1501 PetscInt vbs,bs; 1502 PetscBT bitmask=NULL; 1503 PetscErrorCode ierr; 1504 1505 PetscFunctionBegin; 1506 /* 1507 No need to setup local scatters if 1508 - primal space is unchanged 1509 AND 1510 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1511 AND 1512 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1513 */ 1514 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1515 PetscFunctionReturn(0); 1516 } 1517 /* destroy old objects */ 1518 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1519 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1520 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1521 /* Set Non-overlapping dimensions */ 1522 n_B = pcis->n_B; 1523 n_D = pcis->n - n_B; 1524 n_vertices = pcbddc->n_vertices; 1525 1526 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1527 1528 /* create auxiliary bitmask and allocate workspace */ 1529 if (!sub_schurs->reuse_mumps) { 1530 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1531 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1532 for (i=0;i<n_vertices;i++) { 1533 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1534 } 1535 1536 for (i=0, n_R=0; i<pcis->n; i++) { 1537 if (!PetscBTLookup(bitmask,i)) { 1538 idx_R_local[n_R++] = i; 1539 } 1540 } 1541 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1542 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1543 1544 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1545 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1546 } 1547 1548 /* Block code */ 1549 vbs = 1; 1550 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1551 if (bs>1 && !(n_vertices%bs)) { 1552 PetscBool is_blocked = PETSC_TRUE; 1553 PetscInt *vary; 1554 if (!sub_schurs->reuse_mumps) { 1555 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1556 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1557 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1558 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1559 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1560 for (i=0; i<pcis->n/bs; i++) { 1561 if (vary[i]!=0 && vary[i]!=bs) { 1562 is_blocked = PETSC_FALSE; 1563 break; 1564 } 1565 } 1566 ierr = PetscFree(vary);CHKERRQ(ierr); 1567 } else { 1568 /* Verify directly the R set */ 1569 for (i=0; i<n_R/bs; i++) { 1570 PetscInt j,node=idx_R_local[bs*i]; 1571 for (j=1; j<bs; j++) { 1572 if (node != idx_R_local[bs*i+j]-j) { 1573 is_blocked = PETSC_FALSE; 1574 break; 1575 } 1576 } 1577 } 1578 } 1579 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1580 vbs = bs; 1581 for (i=0;i<n_R/vbs;i++) { 1582 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1583 } 1584 } 1585 } 1586 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1587 if (sub_schurs->reuse_mumps) { 1588 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1589 1590 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1591 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1592 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1593 reuse_mumps->is_R = pcbddc->is_R_local; 1594 } else { 1595 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1596 } 1597 1598 /* print some info if requested */ 1599 if (pcbddc->dbg_flag) { 1600 PetscInt benign = 0; 1601 1602 if (pcbddc->benign_p0_lidx >= 0) benign = 1; 1603 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1604 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1605 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1606 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1607 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1608 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1609 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1610 } 1611 1612 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1613 if (!sub_schurs->reuse_mumps) { 1614 IS is_aux1,is_aux2; 1615 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1616 1617 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1618 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1619 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1620 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1621 for (i=0; i<n_D; i++) { 1622 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1623 } 1624 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1625 for (i=0, j=0; i<n_R; i++) { 1626 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1627 aux_array1[j++] = i; 1628 } 1629 } 1630 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1631 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1632 for (i=0, j=0; i<n_B; i++) { 1633 if (!PetscBTLookup(bitmask,is_indices[i])) { 1634 aux_array2[j++] = i; 1635 } 1636 } 1637 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1638 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1639 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1640 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1641 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1642 1643 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1644 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1645 for (i=0, j=0; i<n_R; i++) { 1646 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1647 aux_array1[j++] = i; 1648 } 1649 } 1650 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1651 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1652 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1653 } 1654 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1655 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1656 } else { 1657 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1658 IS tis; 1659 PetscInt schur_size; 1660 1661 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1662 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1663 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1664 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1665 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1666 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1667 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1668 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1669 } 1670 } 1671 PetscFunctionReturn(0); 1672 } 1673 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1677 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1678 { 1679 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1680 PC_IS *pcis = (PC_IS*)pc->data; 1681 PC pc_temp; 1682 Mat A_RR; 1683 MatReuse reuse; 1684 PetscScalar m_one = -1.0; 1685 PetscReal value; 1686 PetscInt n_D,n_R; 1687 PetscBool use_exact,use_exact_reduced,issbaij; 1688 PetscErrorCode ierr; 1689 /* prefixes stuff */ 1690 char dir_prefix[256],neu_prefix[256],str_level[16]; 1691 size_t len; 1692 1693 PetscFunctionBegin; 1694 1695 /* compute prefixes */ 1696 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1697 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1698 if (!pcbddc->current_level) { 1699 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1700 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1701 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1702 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1703 } else { 1704 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1705 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1706 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1707 len -= 15; /* remove "pc_bddc_coarse_" */ 1708 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1709 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1710 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1711 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1712 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1713 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1714 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1715 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1716 } 1717 1718 /* DIRICHLET PROBLEM */ 1719 if (dirichlet) { 1720 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1721 if (pcbddc->local_mat->symmetric_set) { 1722 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1723 } 1724 /* Matrix for Dirichlet problem is pcis->A_II */ 1725 n_D = pcis->n - pcis->n_B; 1726 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1727 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1728 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1729 /* default */ 1730 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1731 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1732 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1733 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1734 if (issbaij) { 1735 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1736 } else { 1737 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1738 } 1739 /* Allow user's customization */ 1740 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1741 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1742 } 1743 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1744 if (sub_schurs->reuse_mumps) { 1745 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1746 1747 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1748 } 1749 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1750 if (!n_D) { 1751 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1752 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1753 } 1754 /* Set Up KSP for Dirichlet problem of BDDC */ 1755 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1756 /* set ksp_D into pcis data */ 1757 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1758 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1759 pcis->ksp_D = pcbddc->ksp_D; 1760 } 1761 1762 /* NEUMANN PROBLEM */ 1763 A_RR = 0; 1764 if (neumann) { 1765 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1766 PetscInt ibs,mbs; 1767 PetscBool issbaij; 1768 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1769 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1770 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1771 if (pcbddc->ksp_R) { /* already created ksp */ 1772 PetscInt nn_R; 1773 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1774 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1775 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1776 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1777 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1778 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1779 reuse = MAT_INITIAL_MATRIX; 1780 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1781 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1782 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1783 reuse = MAT_INITIAL_MATRIX; 1784 } else { /* safe to reuse the matrix */ 1785 reuse = MAT_REUSE_MATRIX; 1786 } 1787 } 1788 /* last check */ 1789 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1790 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1791 reuse = MAT_INITIAL_MATRIX; 1792 } 1793 } else { /* first time, so we need to create the matrix */ 1794 reuse = MAT_INITIAL_MATRIX; 1795 } 1796 /* extract A_RR */ 1797 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1798 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1799 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1800 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1801 if (matis->A == pcbddc->local_mat) { 1802 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1803 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1804 } else { 1805 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1806 } 1807 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1808 if (matis->A == pcbddc->local_mat) { 1809 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1810 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1811 } else { 1812 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1813 } 1814 } 1815 if (!sub_schurs->reuse_mumps) { 1816 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1817 if (pcbddc->local_mat->symmetric_set) { 1818 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1819 } 1820 } else { 1821 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1822 1823 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1824 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1825 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1826 } 1827 if (!pcbddc->ksp_R) { /* create object if not present */ 1828 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1829 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1830 /* default */ 1831 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1832 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1833 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1834 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1835 if (issbaij) { 1836 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1837 } else { 1838 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1839 } 1840 /* Allow user's customization */ 1841 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1842 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1843 } 1844 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1845 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1846 if (!n_R) { 1847 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1848 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1849 } 1850 /* Reuse MUMPS solver if it is present */ 1851 if (sub_schurs->reuse_mumps) { 1852 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1853 1854 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1855 } 1856 /* Set Up KSP for Neumann problem of BDDC */ 1857 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1858 } 1859 /* free Neumann problem's matrix */ 1860 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1861 1862 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1863 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1864 if (pcbddc->dbg_flag) { 1865 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1866 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1867 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1868 } 1869 if (dirichlet) { /* Dirichlet */ 1870 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1871 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1872 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1873 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1874 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1875 /* need to be adapted? */ 1876 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1877 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1878 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1879 /* print info */ 1880 if (pcbddc->dbg_flag) { 1881 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1882 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1883 } 1884 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1885 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1886 } 1887 } 1888 if (neumann) { /* Neumann */ 1889 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1890 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1891 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1892 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1893 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1894 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1895 /* need to be adapted? */ 1896 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1897 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1898 /* print info */ 1899 if (pcbddc->dbg_flag) { 1900 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1901 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1902 } 1903 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1904 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1905 } 1906 } 1907 } 1908 PetscFunctionReturn(0); 1909 } 1910 1911 #undef __FUNCT__ 1912 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1913 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1914 { 1915 PetscErrorCode ierr; 1916 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1917 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1918 1919 PetscFunctionBegin; 1920 if (!sub_schurs->reuse_mumps) { 1921 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1922 } 1923 if (!pcbddc->switch_static) { 1924 if (applytranspose && pcbddc->local_auxmat1) { 1925 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1926 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1927 } 1928 if (!sub_schurs->reuse_mumps) { 1929 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1930 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1931 } else { 1932 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1933 1934 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1935 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1936 } 1937 } else { 1938 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1939 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1940 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1941 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1942 if (applytranspose && pcbddc->local_auxmat1) { 1943 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1944 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1945 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1946 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1947 } 1948 } 1949 if (!sub_schurs->reuse_mumps) { 1950 if (applytranspose) { 1951 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1952 } else { 1953 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1954 } 1955 #if defined(PETSC_HAVE_MUMPS) 1956 } else { 1957 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1958 1959 if (applytranspose) { 1960 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1961 } else { 1962 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1963 } 1964 #endif 1965 } 1966 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1967 if (!pcbddc->switch_static) { 1968 if (!sub_schurs->reuse_mumps) { 1969 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1970 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1971 } else { 1972 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1973 1974 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1975 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1976 } 1977 if (!applytranspose && pcbddc->local_auxmat1) { 1978 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1979 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1980 } 1981 } else { 1982 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1983 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1984 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1985 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1986 if (!applytranspose && pcbddc->local_auxmat1) { 1987 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1988 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1989 } 1990 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1991 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1992 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1993 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2001 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2002 { 2003 PetscErrorCode ierr; 2004 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2005 PC_IS* pcis = (PC_IS*) (pc->data); 2006 const PetscScalar zero = 0.0; 2007 2008 PetscFunctionBegin; 2009 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2010 if (applytranspose) { 2011 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2012 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2013 } else { 2014 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2015 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2016 } 2017 2018 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2019 if (pcbddc->benign_p0_lidx >= 0) { 2020 PetscScalar *array; 2021 2022 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2023 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2024 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2025 } 2026 2027 /* start communications from local primal nodes to rhs of coarse solver */ 2028 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2029 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2030 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2031 2032 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2033 /* TODO remove null space when doing multilevel */ 2034 if (pcbddc->coarse_ksp) { 2035 Vec rhs,sol; 2036 2037 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2038 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2039 if (applytranspose) { 2040 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2041 } else { 2042 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2043 } 2044 } 2045 2046 /* Local solution on R nodes */ 2047 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2048 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2049 } 2050 2051 /* communications from coarse sol to local primal nodes */ 2052 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2053 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2054 2055 /* Sum contributions from two levels */ 2056 if (applytranspose) { 2057 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2058 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2059 } else { 2060 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2061 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2062 } 2063 /* store p0 */ 2064 if (pcbddc->benign_p0_lidx >= 0) { 2065 PetscScalar *array; 2066 2067 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2068 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2069 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2070 } 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2076 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2077 { 2078 PetscErrorCode ierr; 2079 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2080 PetscScalar *array; 2081 Vec from,to; 2082 2083 PetscFunctionBegin; 2084 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2085 from = pcbddc->coarse_vec; 2086 to = pcbddc->vec1_P; 2087 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2088 Vec tvec; 2089 2090 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2091 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2092 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2093 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2094 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2095 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2096 } 2097 } else { /* from local to global -> put data in coarse right hand side */ 2098 from = pcbddc->vec1_P; 2099 to = pcbddc->coarse_vec; 2100 } 2101 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2107 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2108 { 2109 PetscErrorCode ierr; 2110 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2111 PetscScalar *array; 2112 Vec from,to; 2113 2114 PetscFunctionBegin; 2115 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2116 from = pcbddc->coarse_vec; 2117 to = pcbddc->vec1_P; 2118 } else { /* from local to global -> put data in coarse right hand side */ 2119 from = pcbddc->vec1_P; 2120 to = pcbddc->coarse_vec; 2121 } 2122 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2123 if (smode == SCATTER_FORWARD) { 2124 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2125 Vec tvec; 2126 2127 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2128 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2129 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2130 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2131 } 2132 } else { 2133 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2134 ierr = VecResetArray(from);CHKERRQ(ierr); 2135 } 2136 } 2137 PetscFunctionReturn(0); 2138 } 2139 2140 /* uncomment for testing purposes */ 2141 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2144 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2145 { 2146 PetscErrorCode ierr; 2147 PC_IS* pcis = (PC_IS*)(pc->data); 2148 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2149 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2150 /* one and zero */ 2151 PetscScalar one=1.0,zero=0.0; 2152 /* space to store constraints and their local indices */ 2153 PetscScalar *constraints_data; 2154 PetscInt *constraints_idxs,*constraints_idxs_B; 2155 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2156 PetscInt *constraints_n; 2157 /* iterators */ 2158 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2159 /* BLAS integers */ 2160 PetscBLASInt lwork,lierr; 2161 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2162 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2163 /* reuse */ 2164 PetscInt olocal_primal_size,olocal_primal_size_cc; 2165 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2166 /* change of basis */ 2167 PetscBool qr_needed; 2168 PetscBT change_basis,qr_needed_idx; 2169 /* auxiliary stuff */ 2170 PetscInt *nnz,*is_indices; 2171 PetscInt ncc; 2172 /* some quantities */ 2173 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2174 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2175 2176 PetscFunctionBegin; 2177 /* Destroy Mat objects computed previously */ 2178 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2179 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2180 /* save info on constraints from previous setup (if any) */ 2181 olocal_primal_size = pcbddc->local_primal_size; 2182 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2183 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2184 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2185 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2186 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2187 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2188 2189 /* print some info */ 2190 if (pcbddc->dbg_flag) { 2191 IS vertices; 2192 PetscInt nv,nedges,nfaces; 2193 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2194 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2195 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2196 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2197 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2198 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2199 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2200 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2201 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2202 } 2203 2204 if (!pcbddc->adaptive_selection) { 2205 IS ISForVertices,*ISForFaces,*ISForEdges; 2206 MatNullSpace nearnullsp; 2207 const Vec *nearnullvecs; 2208 Vec *localnearnullsp; 2209 PetscScalar *array; 2210 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2211 PetscBool nnsp_has_cnst; 2212 /* LAPACK working arrays for SVD or POD */ 2213 PetscBool skip_lapack,boolforchange; 2214 PetscScalar *work; 2215 PetscReal *singular_vals; 2216 #if defined(PETSC_USE_COMPLEX) 2217 PetscReal *rwork; 2218 #endif 2219 #if defined(PETSC_MISSING_LAPACK_GESVD) 2220 PetscScalar *temp_basis,*correlation_mat; 2221 #else 2222 PetscBLASInt dummy_int=1; 2223 PetscScalar dummy_scalar=1.; 2224 #endif 2225 2226 /* Get index sets for faces, edges and vertices from graph */ 2227 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2228 /* free unneeded index sets */ 2229 if (!pcbddc->use_vertices) { 2230 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2231 } 2232 if (!pcbddc->use_edges) { 2233 for (i=0;i<n_ISForEdges;i++) { 2234 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2235 } 2236 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2237 n_ISForEdges = 0; 2238 } 2239 if (!pcbddc->use_faces) { 2240 for (i=0;i<n_ISForFaces;i++) { 2241 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2242 } 2243 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2244 n_ISForFaces = 0; 2245 } 2246 2247 #if defined(PETSC_USE_DEBUG) 2248 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2249 Also use_change_of_basis should be consistent among processors */ 2250 if (pcbddc->NullSpace) { 2251 PetscBool tbool[2],gbool[2]; 2252 2253 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2254 pcbddc->use_change_of_basis = PETSC_TRUE; 2255 if (!ISForEdges) { 2256 pcbddc->use_change_on_faces = PETSC_TRUE; 2257 } 2258 } 2259 tbool[0] = pcbddc->use_change_of_basis; 2260 tbool[1] = pcbddc->use_change_on_faces; 2261 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2262 pcbddc->use_change_of_basis = gbool[0]; 2263 pcbddc->use_change_on_faces = gbool[1]; 2264 } 2265 #endif 2266 2267 /* check if near null space is attached to global mat */ 2268 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2269 if (nearnullsp) { 2270 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2271 /* remove any stored info */ 2272 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2273 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2274 /* store information for BDDC solver reuse */ 2275 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2276 pcbddc->onearnullspace = nearnullsp; 2277 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2278 for (i=0;i<nnsp_size;i++) { 2279 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2280 } 2281 } else { /* if near null space is not provided BDDC uses constants by default */ 2282 nnsp_size = 0; 2283 nnsp_has_cnst = PETSC_TRUE; 2284 } 2285 /* get max number of constraints on a single cc */ 2286 max_constraints = nnsp_size; 2287 if (nnsp_has_cnst) max_constraints++; 2288 2289 /* 2290 Evaluate maximum storage size needed by the procedure 2291 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2292 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2293 There can be multiple constraints per connected component 2294 */ 2295 n_vertices = 0; 2296 if (ISForVertices) { 2297 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2298 } 2299 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2300 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2301 2302 total_counts = n_ISForFaces+n_ISForEdges; 2303 total_counts *= max_constraints; 2304 total_counts += n_vertices; 2305 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2306 2307 total_counts = 0; 2308 max_size_of_constraint = 0; 2309 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2310 IS used_is; 2311 if (i<n_ISForEdges) { 2312 used_is = ISForEdges[i]; 2313 } else { 2314 used_is = ISForFaces[i-n_ISForEdges]; 2315 } 2316 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2317 total_counts += j; 2318 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2319 } 2320 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2321 2322 /* get local part of global near null space vectors */ 2323 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2324 for (k=0;k<nnsp_size;k++) { 2325 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2326 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2327 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2328 } 2329 2330 /* whether or not to skip lapack calls */ 2331 skip_lapack = PETSC_TRUE; 2332 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2333 2334 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2335 if (!skip_lapack) { 2336 PetscScalar temp_work; 2337 2338 #if defined(PETSC_MISSING_LAPACK_GESVD) 2339 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2340 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2341 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2342 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2343 #if defined(PETSC_USE_COMPLEX) 2344 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2345 #endif 2346 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2347 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2348 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2349 lwork = -1; 2350 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2351 #if !defined(PETSC_USE_COMPLEX) 2352 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2353 #else 2354 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2355 #endif 2356 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2357 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2358 #else /* on missing GESVD */ 2359 /* SVD */ 2360 PetscInt max_n,min_n; 2361 max_n = max_size_of_constraint; 2362 min_n = max_constraints; 2363 if (max_size_of_constraint < max_constraints) { 2364 min_n = max_size_of_constraint; 2365 max_n = max_constraints; 2366 } 2367 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2368 #if defined(PETSC_USE_COMPLEX) 2369 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2370 #endif 2371 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2372 lwork = -1; 2373 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2374 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2375 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2376 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2377 #if !defined(PETSC_USE_COMPLEX) 2378 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2379 #else 2380 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2381 #endif 2382 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2383 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2384 #endif /* on missing GESVD */ 2385 /* Allocate optimal workspace */ 2386 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2387 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2388 } 2389 /* Now we can loop on constraining sets */ 2390 total_counts = 0; 2391 constraints_idxs_ptr[0] = 0; 2392 constraints_data_ptr[0] = 0; 2393 /* vertices */ 2394 if (n_vertices) { 2395 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2396 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2397 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2398 for (i=0;i<n_vertices;i++) { 2399 constraints_n[total_counts] = 1; 2400 constraints_data[total_counts] = 1.0; 2401 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2402 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2403 total_counts++; 2404 } 2405 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2406 PetscBool used_vertex; 2407 for (i=0;i<n_vertices;i++) { 2408 used_vertex = PETSC_FALSE; 2409 k = 0; 2410 while (!used_vertex && k<nnsp_size) { 2411 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2412 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2413 constraints_n[total_counts] = 1; 2414 constraints_idxs[total_counts] = is_indices[i]; 2415 constraints_data[total_counts] = 1.0; 2416 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2417 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2418 total_counts++; 2419 used_vertex = PETSC_TRUE; 2420 } 2421 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2422 k++; 2423 } 2424 } 2425 } 2426 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2427 n_vertices = total_counts; 2428 } 2429 2430 /* edges and faces */ 2431 total_counts_cc = total_counts; 2432 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2433 IS used_is; 2434 PetscBool idxs_copied = PETSC_FALSE; 2435 2436 if (ncc<n_ISForEdges) { 2437 used_is = ISForEdges[ncc]; 2438 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2439 } else { 2440 used_is = ISForFaces[ncc-n_ISForEdges]; 2441 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2442 } 2443 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2444 2445 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2446 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2447 /* change of basis should not be performed on local periodic nodes */ 2448 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2449 if (nnsp_has_cnst) { 2450 PetscScalar quad_value; 2451 2452 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2453 idxs_copied = PETSC_TRUE; 2454 2455 if (!pcbddc->use_nnsp_true) { 2456 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2457 } else { 2458 quad_value = 1.0; 2459 } 2460 for (j=0;j<size_of_constraint;j++) { 2461 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2462 } 2463 temp_constraints++; 2464 total_counts++; 2465 } 2466 for (k=0;k<nnsp_size;k++) { 2467 PetscReal real_value; 2468 PetscScalar *ptr_to_data; 2469 2470 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2471 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2472 for (j=0;j<size_of_constraint;j++) { 2473 ptr_to_data[j] = array[is_indices[j]]; 2474 } 2475 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2476 /* check if array is null on the connected component */ 2477 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2478 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2479 if (real_value > 0.0) { /* keep indices and values */ 2480 temp_constraints++; 2481 total_counts++; 2482 if (!idxs_copied) { 2483 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2484 idxs_copied = PETSC_TRUE; 2485 } 2486 } 2487 } 2488 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2489 valid_constraints = temp_constraints; 2490 if (!pcbddc->use_nnsp_true && temp_constraints) { 2491 if (temp_constraints == 1) { /* just normalize the constraint */ 2492 PetscScalar norm,*ptr_to_data; 2493 2494 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2495 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2496 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2497 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2498 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2499 } else { /* perform SVD */ 2500 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2501 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2502 2503 #if defined(PETSC_MISSING_LAPACK_GESVD) 2504 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2505 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2506 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2507 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2508 from that computed using LAPACKgesvd 2509 -> This is due to a different computation of eigenvectors in LAPACKheev 2510 -> The quality of the POD-computed basis will be the same */ 2511 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2512 /* Store upper triangular part of correlation matrix */ 2513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2514 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2515 for (j=0;j<temp_constraints;j++) { 2516 for (k=0;k<j+1;k++) { 2517 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2518 } 2519 } 2520 /* compute eigenvalues and eigenvectors of correlation matrix */ 2521 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2522 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2523 #if !defined(PETSC_USE_COMPLEX) 2524 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2525 #else 2526 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2527 #endif 2528 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2529 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2530 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2531 j = 0; 2532 while (j < temp_constraints && singular_vals[j] < tol) j++; 2533 total_counts = total_counts-j; 2534 valid_constraints = temp_constraints-j; 2535 /* scale and copy POD basis into used quadrature memory */ 2536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2537 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2538 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2539 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2540 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2541 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2542 if (j<temp_constraints) { 2543 PetscInt ii; 2544 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2545 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2546 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2547 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2548 for (k=0;k<temp_constraints-j;k++) { 2549 for (ii=0;ii<size_of_constraint;ii++) { 2550 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2551 } 2552 } 2553 } 2554 #else /* on missing GESVD */ 2555 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2556 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2557 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2558 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2559 #if !defined(PETSC_USE_COMPLEX) 2560 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2561 #else 2562 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2563 #endif 2564 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2565 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2566 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2567 k = temp_constraints; 2568 if (k > size_of_constraint) k = size_of_constraint; 2569 j = 0; 2570 while (j < k && singular_vals[k-j-1] < tol) j++; 2571 valid_constraints = k-j; 2572 total_counts = total_counts-temp_constraints+valid_constraints; 2573 #endif /* on missing GESVD */ 2574 } 2575 } 2576 /* update pointers information */ 2577 if (valid_constraints) { 2578 constraints_n[total_counts_cc] = valid_constraints; 2579 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2580 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2581 /* set change_of_basis flag */ 2582 if (boolforchange) { 2583 PetscBTSet(change_basis,total_counts_cc); 2584 } 2585 total_counts_cc++; 2586 } 2587 } 2588 /* free workspace */ 2589 if (!skip_lapack) { 2590 ierr = PetscFree(work);CHKERRQ(ierr); 2591 #if defined(PETSC_USE_COMPLEX) 2592 ierr = PetscFree(rwork);CHKERRQ(ierr); 2593 #endif 2594 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2595 #if defined(PETSC_MISSING_LAPACK_GESVD) 2596 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2597 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2598 #endif 2599 } 2600 for (k=0;k<nnsp_size;k++) { 2601 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2602 } 2603 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2604 /* free index sets of faces, edges and vertices */ 2605 for (i=0;i<n_ISForFaces;i++) { 2606 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2607 } 2608 if (n_ISForFaces) { 2609 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2610 } 2611 for (i=0;i<n_ISForEdges;i++) { 2612 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2613 } 2614 if (n_ISForEdges) { 2615 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2616 } 2617 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2618 } else { 2619 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2620 2621 total_counts = 0; 2622 n_vertices = 0; 2623 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2624 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2625 } 2626 max_constraints = 0; 2627 total_counts_cc = 0; 2628 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2629 total_counts += pcbddc->adaptive_constraints_n[i]; 2630 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2631 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2632 } 2633 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2634 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2635 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2636 constraints_data = pcbddc->adaptive_constraints_data; 2637 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2638 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2639 total_counts_cc = 0; 2640 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2641 if (pcbddc->adaptive_constraints_n[i]) { 2642 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2643 } 2644 } 2645 #if 0 2646 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2647 for (i=0;i<total_counts_cc;i++) { 2648 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2649 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2650 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2651 printf(" %d",constraints_idxs[j]); 2652 } 2653 printf("\n"); 2654 printf("number of cc: %d\n",constraints_n[i]); 2655 } 2656 for (i=0;i<n_vertices;i++) { 2657 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2658 } 2659 for (i=0;i<sub_schurs->n_subs;i++) { 2660 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2661 } 2662 #endif 2663 2664 max_size_of_constraint = 0; 2665 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2666 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2667 /* Change of basis */ 2668 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2669 if (pcbddc->use_change_of_basis) { 2670 for (i=0;i<sub_schurs->n_subs;i++) { 2671 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2672 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2673 } 2674 } 2675 } 2676 } 2677 pcbddc->local_primal_size = total_counts; 2678 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2679 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2680 2681 /* map constraints_idxs in boundary numbering */ 2682 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2683 if (i != constraints_idxs_ptr[total_counts_cc]) { 2684 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2685 } 2686 2687 /* Create constraint matrix */ 2688 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2689 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2690 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2691 2692 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2693 /* determine if a QR strategy is needed for change of basis */ 2694 qr_needed = PETSC_FALSE; 2695 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2696 total_primal_vertices=0; 2697 pcbddc->local_primal_size_cc = 0; 2698 for (i=0;i<total_counts_cc;i++) { 2699 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2700 if (size_of_constraint == 1) { 2701 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2702 pcbddc->local_primal_size_cc += 1; 2703 } else if (PetscBTLookup(change_basis,i)) { 2704 for (k=0;k<constraints_n[i];k++) { 2705 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2706 } 2707 pcbddc->local_primal_size_cc += constraints_n[i]; 2708 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2709 PetscBTSet(qr_needed_idx,i); 2710 qr_needed = PETSC_TRUE; 2711 } 2712 } else { 2713 pcbddc->local_primal_size_cc += 1; 2714 } 2715 } 2716 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2717 pcbddc->n_vertices = total_primal_vertices; 2718 /* permute indices in order to have a sorted set of vertices */ 2719 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2720 2721 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2722 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2723 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2724 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2725 2726 /* nonzero structure of constraint matrix */ 2727 /* and get reference dof for local constraints */ 2728 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2729 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2730 2731 j = total_primal_vertices; 2732 total_counts = total_primal_vertices; 2733 cum = total_primal_vertices; 2734 for (i=n_vertices;i<total_counts_cc;i++) { 2735 if (!PetscBTLookup(change_basis,i)) { 2736 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2737 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2738 cum++; 2739 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2740 for (k=0;k<constraints_n[i];k++) { 2741 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2742 nnz[j+k] = size_of_constraint; 2743 } 2744 j += constraints_n[i]; 2745 } 2746 } 2747 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2748 ierr = PetscFree(nnz);CHKERRQ(ierr); 2749 2750 /* set values in constraint matrix */ 2751 for (i=0;i<total_primal_vertices;i++) { 2752 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2753 } 2754 total_counts = total_primal_vertices; 2755 for (i=n_vertices;i<total_counts_cc;i++) { 2756 if (!PetscBTLookup(change_basis,i)) { 2757 PetscInt *cols; 2758 2759 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2760 cols = constraints_idxs+constraints_idxs_ptr[i]; 2761 for (k=0;k<constraints_n[i];k++) { 2762 PetscInt row = total_counts+k; 2763 PetscScalar *vals; 2764 2765 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2766 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2767 } 2768 total_counts += constraints_n[i]; 2769 } 2770 } 2771 /* assembling */ 2772 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2773 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2774 2775 /* 2776 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2777 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2778 */ 2779 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2780 if (pcbddc->use_change_of_basis) { 2781 /* dual and primal dofs on a single cc */ 2782 PetscInt dual_dofs,primal_dofs; 2783 /* working stuff for GEQRF */ 2784 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2785 PetscBLASInt lqr_work; 2786 /* working stuff for UNGQR */ 2787 PetscScalar *gqr_work,lgqr_work_t; 2788 PetscBLASInt lgqr_work; 2789 /* working stuff for TRTRS */ 2790 PetscScalar *trs_rhs; 2791 PetscBLASInt Blas_NRHS; 2792 /* pointers for values insertion into change of basis matrix */ 2793 PetscInt *start_rows,*start_cols; 2794 PetscScalar *start_vals; 2795 /* working stuff for values insertion */ 2796 PetscBT is_primal; 2797 PetscInt *aux_primal_numbering_B; 2798 /* matrix sizes */ 2799 PetscInt global_size,local_size; 2800 /* temporary change of basis */ 2801 Mat localChangeOfBasisMatrix; 2802 /* extra space for debugging */ 2803 PetscScalar *dbg_work; 2804 2805 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2806 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2807 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2808 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2809 /* nonzeros for local mat */ 2810 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2811 for (i=0;i<pcis->n;i++) nnz[i]=1; 2812 for (i=n_vertices;i<total_counts_cc;i++) { 2813 if (PetscBTLookup(change_basis,i)) { 2814 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2815 if (PetscBTLookup(qr_needed_idx,i)) { 2816 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2817 } else { 2818 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2819 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2820 } 2821 } 2822 } 2823 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2824 ierr = PetscFree(nnz);CHKERRQ(ierr); 2825 /* Set initial identity in the matrix */ 2826 for (i=0;i<pcis->n;i++) { 2827 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2828 } 2829 2830 if (pcbddc->dbg_flag) { 2831 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2832 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2833 } 2834 2835 2836 /* Now we loop on the constraints which need a change of basis */ 2837 /* 2838 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2839 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2840 2841 Basic blocks of change of basis matrix T computed by 2842 2843 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2844 2845 | 1 0 ... 0 s_1/S | 2846 | 0 1 ... 0 s_2/S | 2847 | ... | 2848 | 0 ... 1 s_{n-1}/S | 2849 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2850 2851 with S = \sum_{i=1}^n s_i^2 2852 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2853 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2854 2855 - QR decomposition of constraints otherwise 2856 */ 2857 if (qr_needed) { 2858 /* space to store Q */ 2859 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2860 /* first we issue queries for optimal work */ 2861 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2862 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2863 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2864 lqr_work = -1; 2865 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2866 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2867 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2868 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2869 lgqr_work = -1; 2870 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2871 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2872 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2873 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2874 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2875 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2876 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2877 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2878 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2879 /* array to store scaling factors for reflectors */ 2880 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2881 /* array to store rhs and solution of triangular solver */ 2882 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2883 /* allocating workspace for check */ 2884 if (pcbddc->dbg_flag) { 2885 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2886 } 2887 } 2888 /* array to store whether a node is primal or not */ 2889 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2890 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2891 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2892 if (i != total_primal_vertices) { 2893 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2894 } 2895 for (i=0;i<total_primal_vertices;i++) { 2896 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2897 } 2898 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2899 2900 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2901 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2902 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2903 if (PetscBTLookup(change_basis,total_counts)) { 2904 /* get constraint info */ 2905 primal_dofs = constraints_n[total_counts]; 2906 dual_dofs = size_of_constraint-primal_dofs; 2907 2908 if (pcbddc->dbg_flag) { 2909 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2910 } 2911 2912 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2913 2914 /* copy quadrature constraints for change of basis check */ 2915 if (pcbddc->dbg_flag) { 2916 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2917 } 2918 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2919 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2920 2921 /* compute QR decomposition of constraints */ 2922 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2923 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2924 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2925 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2926 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2927 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2928 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2929 2930 /* explictly compute R^-T */ 2931 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2932 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2933 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2934 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2935 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2936 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2937 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2938 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2939 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2940 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2941 2942 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2943 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2944 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2945 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2946 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2947 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2948 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2949 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2950 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2951 2952 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2953 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2954 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2955 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2956 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2957 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2958 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2959 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2960 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2961 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2962 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2963 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2964 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2965 2966 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2967 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2968 /* insert cols for primal dofs */ 2969 for (j=0;j<primal_dofs;j++) { 2970 start_vals = &qr_basis[j*size_of_constraint]; 2971 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2972 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2973 } 2974 /* insert cols for dual dofs */ 2975 for (j=0,k=0;j<dual_dofs;k++) { 2976 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2977 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2978 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2979 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2980 j++; 2981 } 2982 } 2983 2984 /* check change of basis */ 2985 if (pcbddc->dbg_flag) { 2986 PetscInt ii,jj; 2987 PetscBool valid_qr=PETSC_TRUE; 2988 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2989 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2990 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2991 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2992 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2993 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2994 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2995 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2996 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2997 for (jj=0;jj<size_of_constraint;jj++) { 2998 for (ii=0;ii<primal_dofs;ii++) { 2999 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3000 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3001 } 3002 } 3003 if (!valid_qr) { 3004 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3005 for (jj=0;jj<size_of_constraint;jj++) { 3006 for (ii=0;ii<primal_dofs;ii++) { 3007 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3008 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3009 } 3010 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3011 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3012 } 3013 } 3014 } 3015 } else { 3016 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3017 } 3018 } 3019 } else { /* simple transformation block */ 3020 PetscInt row,col; 3021 PetscScalar val,norm; 3022 3023 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3024 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3025 for (j=0;j<size_of_constraint;j++) { 3026 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3027 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3028 if (!PetscBTLookup(is_primal,row_B)) { 3029 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3030 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3031 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3032 } else { 3033 for (k=0;k<size_of_constraint;k++) { 3034 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3035 if (row != col) { 3036 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3037 } else { 3038 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3039 } 3040 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3041 } 3042 } 3043 } 3044 if (pcbddc->dbg_flag) { 3045 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3046 } 3047 } 3048 } else { 3049 if (pcbddc->dbg_flag) { 3050 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3051 } 3052 } 3053 } 3054 3055 /* free workspace */ 3056 if (qr_needed) { 3057 if (pcbddc->dbg_flag) { 3058 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3059 } 3060 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3061 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3062 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3063 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3064 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3065 } 3066 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3067 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3068 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3069 3070 /* assembling of global change of variable */ 3071 { 3072 Mat tmat; 3073 PetscInt bs; 3074 3075 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3076 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3077 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3078 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3079 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3080 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3081 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3082 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3083 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3084 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3085 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3086 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3087 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3088 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3089 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3090 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3091 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3092 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3093 } 3094 /* check */ 3095 if (pcbddc->dbg_flag) { 3096 PetscReal error; 3097 Vec x,x_change; 3098 3099 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3100 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3101 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3102 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3103 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3104 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3105 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3106 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3107 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3108 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3109 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3110 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3111 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3112 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3113 ierr = VecDestroy(&x);CHKERRQ(ierr); 3114 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3115 } 3116 3117 /* adapt sub_schurs computed (if any) */ 3118 if (pcbddc->use_deluxe_scaling) { 3119 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3120 if (sub_schurs->S_Ej_all) { 3121 Mat S_new,tmat; 3122 ISLocalToGlobalMapping NtoSall; 3123 IS is_all_N,is_V,is_V_Sall; 3124 const PetscScalar *array; 3125 const PetscInt *idxs_V,*idxs_all; 3126 PetscInt i,n_V; 3127 3128 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3129 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3130 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3131 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3132 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3133 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3134 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3135 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3136 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3137 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3138 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3139 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3140 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3141 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3142 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3143 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3144 for (i=0;i<n_V;i++) { 3145 PetscScalar val; 3146 PetscInt idx; 3147 3148 idx = idxs_V[i]; 3149 val = array[idxs_all[idxs_V[i]]]; 3150 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3151 } 3152 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3153 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3154 sub_schurs->S_Ej_all = S_new; 3155 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3156 if (sub_schurs->sum_S_Ej_all) { 3157 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3158 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3159 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3160 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3161 sub_schurs->sum_S_Ej_all = S_new; 3162 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3163 } 3164 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3165 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3166 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3167 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3168 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3169 } 3170 } 3171 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3172 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3173 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3174 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3175 } 3176 3177 /* set up change of basis context */ 3178 if (pcbddc->ChangeOfBasisMatrix) { 3179 PCBDDCChange_ctx change_ctx; 3180 3181 if (!pcbddc->new_global_mat) { 3182 PetscInt global_size,local_size; 3183 3184 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3185 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3186 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3187 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3188 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3189 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3190 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3191 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3192 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3193 } else { 3194 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3195 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3196 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3197 } 3198 if (!pcbddc->user_ChangeOfBasisMatrix) { 3199 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3200 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3201 } else { 3202 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3203 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3204 } 3205 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3206 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3207 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3208 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3209 } 3210 3211 /* add pressure dof to set of primal nodes for numbering purposes */ 3212 if (pcbddc->benign_p0_lidx >= 0) { 3213 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3214 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx; 3215 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3216 pcbddc->local_primal_size_cc++; 3217 pcbddc->local_primal_size++; 3218 } 3219 3220 /* check if a new primal space has been introduced (also take into account benign trick) */ 3221 pcbddc->new_primal_space_local = PETSC_TRUE; 3222 if (olocal_primal_size == pcbddc->local_primal_size) { 3223 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3224 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3225 if (!pcbddc->new_primal_space_local) { 3226 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3227 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3228 } 3229 } 3230 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3231 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3232 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3233 3234 /* flush dbg viewer */ 3235 if (pcbddc->dbg_flag) { 3236 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3237 } 3238 3239 /* free workspace */ 3240 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3241 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3242 if (!pcbddc->adaptive_selection) { 3243 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3244 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3245 } else { 3246 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3247 pcbddc->adaptive_constraints_idxs_ptr, 3248 pcbddc->adaptive_constraints_data_ptr, 3249 pcbddc->adaptive_constraints_idxs, 3250 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3251 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3252 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3253 } 3254 PetscFunctionReturn(0); 3255 } 3256 3257 #undef __FUNCT__ 3258 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3259 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3260 { 3261 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3262 PC_IS *pcis = (PC_IS*)pc->data; 3263 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3264 PetscInt ierr,i,vertex_size,N; 3265 PetscViewer viewer=pcbddc->dbg_viewer; 3266 3267 PetscFunctionBegin; 3268 /* Reset previously computed graph */ 3269 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3270 /* Init local Graph struct */ 3271 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3272 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3273 3274 /* Check validity of the csr graph passed in by the user */ 3275 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3276 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3277 } 3278 3279 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3280 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3281 PetscInt *xadj,*adjncy; 3282 PetscInt nvtxs; 3283 PetscBool flg_row=PETSC_FALSE; 3284 3285 if (pcbddc->use_local_adj) { 3286 3287 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3288 if (flg_row) { 3289 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3290 pcbddc->computed_rowadj = PETSC_TRUE; 3291 } 3292 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3293 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3294 IS is_dummy; 3295 ISLocalToGlobalMapping l2gmap_dummy; 3296 PetscInt j,sum; 3297 PetscInt *cxadj,*cadjncy; 3298 const PetscInt *idxs; 3299 PCBDDCGraph graph; 3300 PetscBT is_on_boundary; 3301 3302 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3303 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3304 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3305 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3306 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3307 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3308 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3309 if (flg_row) { 3310 graph->xadj = xadj; 3311 graph->adjncy = adjncy; 3312 } 3313 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3314 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3315 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3316 3317 if (pcbddc->dbg_flag) { 3318 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3319 for (i=0;i<graph->ncc;i++) { 3320 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3321 } 3322 } 3323 3324 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3325 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3326 for (i=0;i<pcis->n_B;i++) { 3327 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3328 } 3329 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3330 3331 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3332 sum = 0; 3333 for (i=0;i<graph->ncc;i++) { 3334 PetscInt sizecc = 0; 3335 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3336 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3337 sizecc++; 3338 } 3339 } 3340 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3341 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3342 cxadj[graph->queue[j]] = sizecc; 3343 } 3344 } 3345 sum += sizecc*sizecc; 3346 } 3347 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3348 sum = 0; 3349 for (i=0;i<pcis->n;i++) { 3350 PetscInt temp = cxadj[i]; 3351 cxadj[i] = sum; 3352 sum += temp; 3353 } 3354 cxadj[pcis->n] = sum; 3355 for (i=0;i<graph->ncc;i++) { 3356 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3357 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3358 PetscInt k,sizecc = 0; 3359 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3360 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3361 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3362 sizecc++; 3363 } 3364 } 3365 } 3366 } 3367 } 3368 if (sum) { 3369 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3370 } else { 3371 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3372 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3373 } 3374 graph->xadj = 0; 3375 graph->adjncy = 0; 3376 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3377 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3378 } 3379 } 3380 if (pcbddc->dbg_flag) { 3381 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3382 } 3383 3384 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3385 vertex_size = 1; 3386 if (pcbddc->user_provided_isfordofs) { 3387 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3388 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3389 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3390 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3391 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3392 } 3393 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3394 pcbddc->n_ISForDofs = 0; 3395 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3396 } 3397 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3398 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3399 } else { 3400 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3401 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3402 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3403 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3404 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3405 } 3406 } 3407 } 3408 3409 /* Setup of Graph */ 3410 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3411 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3412 } 3413 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3414 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3415 } 3416 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3417 3418 /* Graph's connected components analysis */ 3419 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3420 3421 /* print some info to stdout */ 3422 if (pcbddc->dbg_flag) { 3423 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3424 } 3425 3426 /* mark topography has done */ 3427 pcbddc->recompute_topography = PETSC_FALSE; 3428 PetscFunctionReturn(0); 3429 } 3430 3431 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3432 #undef __FUNCT__ 3433 #define __FUNCT__ "PCBDDCSubsetNumbering" 3434 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3435 { 3436 PetscSF sf; 3437 PetscLayout map; 3438 const PetscInt *idxs; 3439 PetscInt *leaf_data,*root_data,*gidxs; 3440 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3441 PetscInt n_n,nlocals,start,first_index; 3442 PetscMPIInt commsize; 3443 PetscBool first_found; 3444 PetscErrorCode ierr; 3445 3446 PetscFunctionBegin; 3447 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3448 if (subset_mult) { 3449 PetscCheckSameComm(subset,1,subset_mult,2); 3450 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3451 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3452 } 3453 /* create workspace layout for computing global indices of subset */ 3454 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3455 lbounds[0] = lbounds[1] = 0; 3456 for (i=0;i<n;i++) { 3457 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3458 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3459 } 3460 lbounds[0] = -lbounds[0]; 3461 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3462 gbounds[0] = -gbounds[0]; 3463 N = gbounds[1] - gbounds[0] + 1; 3464 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3465 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3466 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3467 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3468 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3469 3470 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3471 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3472 if (subset_mult) { 3473 const PetscInt* idxs_mult; 3474 3475 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3476 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3477 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3478 } else { 3479 for (i=0;i<n;i++) leaf_data[i] = 1; 3480 } 3481 /* local size of new subset */ 3482 n_n = 0; 3483 for (i=0;i<n;i++) n_n += leaf_data[i]; 3484 3485 /* global indexes in layout */ 3486 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3487 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3488 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3489 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3490 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3491 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3492 3493 /* reduce from leaves to roots */ 3494 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3495 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3496 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3497 3498 /* count indexes in local part of layout */ 3499 nlocals = 0; 3500 first_index = -1; 3501 first_found = PETSC_FALSE; 3502 for (i=0;i<Nl;i++) { 3503 if (!first_found && root_data[i]) { 3504 first_found = PETSC_TRUE; 3505 first_index = i; 3506 } 3507 nlocals += root_data[i]; 3508 } 3509 3510 /* cumulative of number of indexes and size of subset without holes */ 3511 #if defined(PETSC_HAVE_MPI_EXSCAN) 3512 start = 0; 3513 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3514 #else 3515 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3516 start = start-nlocals; 3517 #endif 3518 3519 if (N_n) { /* compute total size of new subset if requested */ 3520 *N_n = start + nlocals; 3521 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3522 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3523 } 3524 3525 /* adapt root data with cumulative */ 3526 if (first_found) { 3527 PetscInt old_index; 3528 3529 root_data[first_index] += start; 3530 old_index = first_index; 3531 for (i=first_index+1;i<Nl;i++) { 3532 if (root_data[i]) { 3533 root_data[i] += root_data[old_index]; 3534 old_index = i; 3535 } 3536 } 3537 } 3538 3539 /* from roots to leaves */ 3540 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3541 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3542 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3543 3544 /* create new IS with global indexes without holes */ 3545 if (subset_mult) { 3546 const PetscInt* idxs_mult; 3547 PetscInt cum; 3548 3549 cum = 0; 3550 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3551 for (i=0;i<n;i++) { 3552 PetscInt j; 3553 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3554 } 3555 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3556 } else { 3557 for (i=0;i<n;i++) { 3558 gidxs[i] = leaf_data[i]-1; 3559 } 3560 } 3561 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3562 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3563 PetscFunctionReturn(0); 3564 } 3565 3566 #undef __FUNCT__ 3567 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3568 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3569 { 3570 PetscInt i,j; 3571 PetscScalar *alphas; 3572 PetscErrorCode ierr; 3573 3574 PetscFunctionBegin; 3575 /* this implements stabilized Gram-Schmidt */ 3576 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3577 for (i=0;i<n;i++) { 3578 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3579 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3580 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3581 } 3582 ierr = PetscFree(alphas);CHKERRQ(ierr); 3583 PetscFunctionReturn(0); 3584 } 3585 3586 #undef __FUNCT__ 3587 #define __FUNCT__ "MatISGetSubassemblingPattern" 3588 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3589 { 3590 IS ranks_send_to; 3591 PetscInt n_neighs,*neighs,*n_shared,**shared; 3592 PetscMPIInt size,rank,color; 3593 PetscInt *xadj,*adjncy; 3594 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3595 PetscInt i,local_size,threshold=0; 3596 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3597 PetscSubcomm subcomm; 3598 PetscErrorCode ierr; 3599 3600 PetscFunctionBegin; 3601 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3602 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3603 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3604 3605 /* Get info on mapping */ 3606 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3607 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3608 3609 /* build local CSR graph of subdomains' connectivity */ 3610 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3611 xadj[0] = 0; 3612 xadj[1] = PetscMax(n_neighs-1,0); 3613 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3614 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3615 3616 if (threshold) { 3617 PetscInt xadj_count = 0; 3618 for (i=1;i<n_neighs;i++) { 3619 if (n_shared[i] > threshold) { 3620 adjncy[xadj_count] = neighs[i]; 3621 adjncy_wgt[xadj_count] = n_shared[i]; 3622 xadj_count++; 3623 } 3624 } 3625 xadj[1] = xadj_count; 3626 } else { 3627 if (xadj[1]) { 3628 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3629 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3630 } 3631 } 3632 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3633 if (use_square) { 3634 for (i=0;i<xadj[1];i++) { 3635 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3636 } 3637 } 3638 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3639 3640 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3641 3642 /* 3643 Restrict work on active processes only. 3644 */ 3645 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3646 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3647 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3648 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3649 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3650 if (color) { 3651 ierr = PetscFree(xadj);CHKERRQ(ierr); 3652 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3653 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3654 } else { 3655 Mat subdomain_adj; 3656 IS new_ranks,new_ranks_contig; 3657 MatPartitioning partitioner; 3658 PetscInt prank,rstart=0,rend=0; 3659 PetscInt *is_indices,*oldranks; 3660 PetscBool aggregate; 3661 3662 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3663 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3664 prank = rank; 3665 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3666 /* 3667 for (i=0;i<size;i++) { 3668 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3669 } 3670 */ 3671 for (i=0;i<xadj[1];i++) { 3672 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3673 } 3674 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3675 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3676 if (aggregate) { 3677 PetscInt lrows,row,ncols,*cols; 3678 PetscMPIInt nrank; 3679 PetscScalar *vals; 3680 3681 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3682 lrows = 0; 3683 if (nrank<redprocs) { 3684 lrows = size/redprocs; 3685 if (nrank<size%redprocs) lrows++; 3686 } 3687 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3688 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3689 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3690 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3691 row = nrank; 3692 ncols = xadj[1]-xadj[0]; 3693 cols = adjncy; 3694 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3695 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3696 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3697 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3698 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3699 ierr = PetscFree(xadj);CHKERRQ(ierr); 3700 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3701 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3702 ierr = PetscFree(vals);CHKERRQ(ierr); 3703 } else { 3704 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3705 } 3706 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3707 3708 /* Partition */ 3709 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3710 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3711 if (use_vwgt) { 3712 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3713 v_wgt[0] = local_size; 3714 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3715 } 3716 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3717 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3718 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3719 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3720 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3721 3722 /* renumber new_ranks to avoid "holes" in new set of processors */ 3723 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3724 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3725 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3726 if (!redprocs) { 3727 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3728 } else { 3729 PetscInt idxs[1]; 3730 PetscMPIInt tag; 3731 MPI_Request *reqs; 3732 3733 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3734 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3735 for (i=rstart;i<rend;i++) { 3736 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3737 } 3738 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3739 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3740 ierr = PetscFree(reqs);CHKERRQ(ierr); 3741 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3742 } 3743 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3744 /* clean up */ 3745 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3746 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3747 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3748 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3749 } 3750 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3751 3752 /* assemble parallel IS for sends */ 3753 i = 1; 3754 if (color) i=0; 3755 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3756 /* get back IS */ 3757 *is_sends = ranks_send_to; 3758 PetscFunctionReturn(0); 3759 } 3760 3761 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3762 3763 #undef __FUNCT__ 3764 #define __FUNCT__ "MatISSubassemble" 3765 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3766 { 3767 Mat local_mat; 3768 IS is_sends_internal; 3769 PetscInt rows,cols,new_local_rows; 3770 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3771 PetscBool ismatis,isdense,newisdense,destroy_mat; 3772 ISLocalToGlobalMapping l2gmap; 3773 PetscInt* l2gmap_indices; 3774 const PetscInt* is_indices; 3775 MatType new_local_type; 3776 /* buffers */ 3777 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3778 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3779 PetscInt *recv_buffer_idxs_local; 3780 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3781 /* MPI */ 3782 MPI_Comm comm,comm_n; 3783 PetscSubcomm subcomm; 3784 PetscMPIInt n_sends,n_recvs,commsize; 3785 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3786 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3787 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3788 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3789 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3790 PetscErrorCode ierr; 3791 3792 PetscFunctionBegin; 3793 /* TODO: add missing checks */ 3794 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3795 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3796 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3797 PetscValidLogicalCollectiveInt(mat,nis,7); 3798 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3799 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3800 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3801 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3802 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3803 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3804 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3805 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3806 PetscInt mrows,mcols,mnrows,mncols; 3807 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3808 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3809 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3810 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3811 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3812 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3813 } 3814 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3815 PetscValidLogicalCollectiveInt(mat,bs,0); 3816 /* prepare IS for sending if not provided */ 3817 if (!is_sends) { 3818 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3819 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3820 } else { 3821 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3822 is_sends_internal = is_sends; 3823 } 3824 3825 /* get comm */ 3826 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3827 3828 /* compute number of sends */ 3829 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3830 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3831 3832 /* compute number of receives */ 3833 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3834 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3835 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3836 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3837 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3838 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3839 ierr = PetscFree(iflags);CHKERRQ(ierr); 3840 3841 /* restrict comm if requested */ 3842 subcomm = 0; 3843 destroy_mat = PETSC_FALSE; 3844 if (restrict_comm) { 3845 PetscMPIInt color,subcommsize; 3846 3847 color = 0; 3848 if (restrict_full) { 3849 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3850 } else { 3851 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3852 } 3853 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3854 subcommsize = commsize - subcommsize; 3855 /* check if reuse has been requested */ 3856 if (reuse == MAT_REUSE_MATRIX) { 3857 if (*mat_n) { 3858 PetscMPIInt subcommsize2; 3859 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3860 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3861 comm_n = PetscObjectComm((PetscObject)*mat_n); 3862 } else { 3863 comm_n = PETSC_COMM_SELF; 3864 } 3865 } else { /* MAT_INITIAL_MATRIX */ 3866 PetscMPIInt rank; 3867 3868 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3869 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3870 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3871 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3872 comm_n = PetscSubcommChild(subcomm); 3873 } 3874 /* flag to destroy *mat_n if not significative */ 3875 if (color) destroy_mat = PETSC_TRUE; 3876 } else { 3877 comm_n = comm; 3878 } 3879 3880 /* prepare send/receive buffers */ 3881 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3882 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3883 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3884 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3885 if (nis) { 3886 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3887 } 3888 3889 /* Get data from local matrices */ 3890 if (!isdense) { 3891 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3892 /* TODO: See below some guidelines on how to prepare the local buffers */ 3893 /* 3894 send_buffer_vals should contain the raw values of the local matrix 3895 send_buffer_idxs should contain: 3896 - MatType_PRIVATE type 3897 - PetscInt size_of_l2gmap 3898 - PetscInt global_row_indices[size_of_l2gmap] 3899 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3900 */ 3901 } else { 3902 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3903 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3904 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3905 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3906 send_buffer_idxs[1] = i; 3907 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3908 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3909 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3910 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3911 for (i=0;i<n_sends;i++) { 3912 ilengths_vals[is_indices[i]] = len*len; 3913 ilengths_idxs[is_indices[i]] = len+2; 3914 } 3915 } 3916 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3917 /* additional is (if any) */ 3918 if (nis) { 3919 PetscMPIInt psum; 3920 PetscInt j; 3921 for (j=0,psum=0;j<nis;j++) { 3922 PetscInt plen; 3923 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3924 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3925 psum += len+1; /* indices + lenght */ 3926 } 3927 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3928 for (j=0,psum=0;j<nis;j++) { 3929 PetscInt plen; 3930 const PetscInt *is_array_idxs; 3931 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3932 send_buffer_idxs_is[psum] = plen; 3933 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3934 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3935 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3936 psum += plen+1; /* indices + lenght */ 3937 } 3938 for (i=0;i<n_sends;i++) { 3939 ilengths_idxs_is[is_indices[i]] = psum; 3940 } 3941 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3942 } 3943 3944 buf_size_idxs = 0; 3945 buf_size_vals = 0; 3946 buf_size_idxs_is = 0; 3947 for (i=0;i<n_recvs;i++) { 3948 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3949 buf_size_vals += (PetscInt)olengths_vals[i]; 3950 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3951 } 3952 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3953 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3954 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3955 3956 /* get new tags for clean communications */ 3957 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3958 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3959 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3960 3961 /* allocate for requests */ 3962 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3963 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3964 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3965 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3966 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3967 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3968 3969 /* communications */ 3970 ptr_idxs = recv_buffer_idxs; 3971 ptr_vals = recv_buffer_vals; 3972 ptr_idxs_is = recv_buffer_idxs_is; 3973 for (i=0;i<n_recvs;i++) { 3974 source_dest = onodes[i]; 3975 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3976 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3977 ptr_idxs += olengths_idxs[i]; 3978 ptr_vals += olengths_vals[i]; 3979 if (nis) { 3980 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3981 ptr_idxs_is += olengths_idxs_is[i]; 3982 } 3983 } 3984 for (i=0;i<n_sends;i++) { 3985 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3986 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3987 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3988 if (nis) { 3989 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3990 } 3991 } 3992 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3993 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3994 3995 /* assemble new l2g map */ 3996 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3997 ptr_idxs = recv_buffer_idxs; 3998 new_local_rows = 0; 3999 for (i=0;i<n_recvs;i++) { 4000 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4001 ptr_idxs += olengths_idxs[i]; 4002 } 4003 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4004 ptr_idxs = recv_buffer_idxs; 4005 new_local_rows = 0; 4006 for (i=0;i<n_recvs;i++) { 4007 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4008 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4009 ptr_idxs += olengths_idxs[i]; 4010 } 4011 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4012 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4013 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4014 4015 /* infer new local matrix type from received local matrices type */ 4016 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4017 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4018 if (n_recvs) { 4019 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4020 ptr_idxs = recv_buffer_idxs; 4021 for (i=0;i<n_recvs;i++) { 4022 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4023 new_local_type_private = MATAIJ_PRIVATE; 4024 break; 4025 } 4026 ptr_idxs += olengths_idxs[i]; 4027 } 4028 switch (new_local_type_private) { 4029 case MATDENSE_PRIVATE: 4030 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4031 new_local_type = MATSEQAIJ; 4032 bs = 1; 4033 } else { /* if I receive only 1 dense matrix */ 4034 new_local_type = MATSEQDENSE; 4035 bs = 1; 4036 } 4037 break; 4038 case MATAIJ_PRIVATE: 4039 new_local_type = MATSEQAIJ; 4040 bs = 1; 4041 break; 4042 case MATBAIJ_PRIVATE: 4043 new_local_type = MATSEQBAIJ; 4044 break; 4045 case MATSBAIJ_PRIVATE: 4046 new_local_type = MATSEQSBAIJ; 4047 break; 4048 default: 4049 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4050 break; 4051 } 4052 } else { /* by default, new_local_type is seqdense */ 4053 new_local_type = MATSEQDENSE; 4054 bs = 1; 4055 } 4056 4057 /* create MATIS object if needed */ 4058 if (reuse == MAT_INITIAL_MATRIX) { 4059 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4060 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4061 } else { 4062 /* it also destroys the local matrices */ 4063 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4064 } 4065 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4066 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4067 4068 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4069 4070 /* Global to local map of received indices */ 4071 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4072 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4073 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4074 4075 /* restore attributes -> type of incoming data and its size */ 4076 buf_size_idxs = 0; 4077 for (i=0;i<n_recvs;i++) { 4078 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4079 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4080 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4081 } 4082 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4083 4084 /* set preallocation */ 4085 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4086 if (!newisdense) { 4087 PetscInt *new_local_nnz=0; 4088 4089 ptr_vals = recv_buffer_vals; 4090 ptr_idxs = recv_buffer_idxs_local; 4091 if (n_recvs) { 4092 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4093 } 4094 for (i=0;i<n_recvs;i++) { 4095 PetscInt j; 4096 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4097 for (j=0;j<*(ptr_idxs+1);j++) { 4098 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4099 } 4100 } else { 4101 /* TODO */ 4102 } 4103 ptr_idxs += olengths_idxs[i]; 4104 } 4105 if (new_local_nnz) { 4106 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4107 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4108 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4109 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4110 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4111 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4112 } else { 4113 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4114 } 4115 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4116 } else { 4117 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4118 } 4119 4120 /* set values */ 4121 ptr_vals = recv_buffer_vals; 4122 ptr_idxs = recv_buffer_idxs_local; 4123 for (i=0;i<n_recvs;i++) { 4124 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4125 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4126 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4127 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4128 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4129 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4130 } else { 4131 /* TODO */ 4132 } 4133 ptr_idxs += olengths_idxs[i]; 4134 ptr_vals += olengths_vals[i]; 4135 } 4136 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4137 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4138 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4139 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4140 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4141 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4142 4143 #if 0 4144 if (!restrict_comm) { /* check */ 4145 Vec lvec,rvec; 4146 PetscReal infty_error; 4147 4148 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4149 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4150 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4151 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4152 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4153 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4154 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4155 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4156 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4157 } 4158 #endif 4159 4160 /* assemble new additional is (if any) */ 4161 if (nis) { 4162 PetscInt **temp_idxs,*count_is,j,psum; 4163 4164 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4165 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4166 ptr_idxs = recv_buffer_idxs_is; 4167 psum = 0; 4168 for (i=0;i<n_recvs;i++) { 4169 for (j=0;j<nis;j++) { 4170 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4171 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4172 psum += plen; 4173 ptr_idxs += plen+1; /* shift pointer to received data */ 4174 } 4175 } 4176 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4177 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4178 for (i=1;i<nis;i++) { 4179 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4180 } 4181 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4182 ptr_idxs = recv_buffer_idxs_is; 4183 for (i=0;i<n_recvs;i++) { 4184 for (j=0;j<nis;j++) { 4185 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4186 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4187 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4188 ptr_idxs += plen+1; /* shift pointer to received data */ 4189 } 4190 } 4191 for (i=0;i<nis;i++) { 4192 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4193 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4194 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4195 } 4196 ierr = PetscFree(count_is);CHKERRQ(ierr); 4197 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4198 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4199 } 4200 /* free workspace */ 4201 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4202 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4203 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4204 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4205 if (isdense) { 4206 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4207 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4208 } else { 4209 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4210 } 4211 if (nis) { 4212 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4213 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4214 } 4215 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4216 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4217 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4218 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4219 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4220 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4221 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4222 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4223 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4224 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4225 ierr = PetscFree(onodes);CHKERRQ(ierr); 4226 if (nis) { 4227 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4228 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4229 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4230 } 4231 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4232 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4233 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4234 for (i=0;i<nis;i++) { 4235 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4236 } 4237 *mat_n = NULL; 4238 } 4239 PetscFunctionReturn(0); 4240 } 4241 4242 /* temporary hack into ksp private data structure */ 4243 #include <petsc/private/kspimpl.h> 4244 4245 #undef __FUNCT__ 4246 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4247 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4248 { 4249 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4250 PC_IS *pcis = (PC_IS*)pc->data; 4251 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4252 MatNullSpace CoarseNullSpace=NULL; 4253 ISLocalToGlobalMapping coarse_islg; 4254 IS coarse_is,*isarray; 4255 PetscInt i,im_active=-1,active_procs=-1; 4256 PetscInt nis,nisdofs,nisneu; 4257 PC pc_temp; 4258 PCType coarse_pc_type; 4259 KSPType coarse_ksp_type; 4260 PetscBool multilevel_requested,multilevel_allowed; 4261 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4262 Mat t_coarse_mat_is; 4263 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4264 PetscMPIInt all_procs; 4265 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4266 PetscBool compute_vecs = PETSC_FALSE; 4267 PetscScalar *array; 4268 PetscErrorCode ierr; 4269 4270 PetscFunctionBegin; 4271 /* Assign global numbering to coarse dofs */ 4272 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4273 PetscInt ocoarse_size; 4274 compute_vecs = PETSC_TRUE; 4275 ocoarse_size = pcbddc->coarse_size; 4276 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4277 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4278 /* see if we can avoid some work */ 4279 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4280 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4281 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4282 PC pc; 4283 PetscBool isbddc; 4284 4285 /* temporary workaround since PCBDDC does not have a reset method so far */ 4286 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4287 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4288 if (isbddc) { 4289 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4290 } 4291 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4292 coarse_reuse = PETSC_FALSE; 4293 } else { /* we can safely reuse already computed coarse matrix */ 4294 coarse_reuse = PETSC_TRUE; 4295 } 4296 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4297 coarse_reuse = PETSC_FALSE; 4298 } 4299 /* reset any subassembling information */ 4300 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4301 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4302 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4303 coarse_reuse = PETSC_TRUE; 4304 } 4305 4306 /* count "active" (i.e. with positive local size) and "void" processes */ 4307 im_active = !!(pcis->n); 4308 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4309 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4310 void_procs = all_procs-active_procs; 4311 csin_type_simple = PETSC_TRUE; 4312 redist = PETSC_FALSE; 4313 if (pcbddc->current_level && void_procs) { 4314 csin_ml = PETSC_TRUE; 4315 ncoarse_ml = void_procs; 4316 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4317 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4318 csin_ds = PETSC_TRUE; 4319 ncoarse_ds = pcbddc->redistribute_coarse; 4320 redist = PETSC_TRUE; 4321 } else { 4322 csin_ds = PETSC_TRUE; 4323 ncoarse_ds = active_procs; 4324 redist = PETSC_TRUE; 4325 } 4326 } else { 4327 csin_ml = PETSC_FALSE; 4328 ncoarse_ml = all_procs; 4329 if (void_procs) { 4330 csin_ds = PETSC_TRUE; 4331 ncoarse_ds = void_procs; 4332 csin_type_simple = PETSC_FALSE; 4333 } else { 4334 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4335 csin_ds = PETSC_TRUE; 4336 ncoarse_ds = pcbddc->redistribute_coarse; 4337 redist = PETSC_TRUE; 4338 } else { 4339 csin_ds = PETSC_FALSE; 4340 ncoarse_ds = all_procs; 4341 } 4342 } 4343 } 4344 4345 /* 4346 test if we can go multilevel: three conditions must be satisfied: 4347 - we have not exceeded the number of levels requested 4348 - we can actually subassemble the active processes 4349 - we can find a suitable number of MPI processes where we can place the subassembled problem 4350 */ 4351 multilevel_allowed = PETSC_FALSE; 4352 multilevel_requested = PETSC_FALSE; 4353 if (pcbddc->current_level < pcbddc->max_levels) { 4354 multilevel_requested = PETSC_TRUE; 4355 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4356 multilevel_allowed = PETSC_FALSE; 4357 } else { 4358 multilevel_allowed = PETSC_TRUE; 4359 } 4360 } 4361 /* determine number of process partecipating to coarse solver */ 4362 if (multilevel_allowed) { 4363 ncoarse = ncoarse_ml; 4364 csin = csin_ml; 4365 redist = PETSC_FALSE; 4366 } else { 4367 ncoarse = ncoarse_ds; 4368 csin = csin_ds; 4369 } 4370 4371 /* creates temporary l2gmap and IS for coarse indexes */ 4372 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4373 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4374 4375 /* creates temporary MATIS object for coarse matrix */ 4376 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4377 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4378 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4379 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4380 #if 0 4381 { 4382 PetscViewer viewer; 4383 char filename[256]; 4384 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4385 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4386 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4387 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4388 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4389 } 4390 #endif 4391 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4392 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4393 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4394 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4395 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4396 4397 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4398 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4399 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4400 const PetscInt *idxs; 4401 ISLocalToGlobalMapping tmap; 4402 4403 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4404 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4405 /* allocate space for temporary storage */ 4406 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4407 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4408 /* allocate for IS array */ 4409 nisdofs = pcbddc->n_ISForDofsLocal; 4410 nisneu = !!pcbddc->NeumannBoundariesLocal; 4411 nis = nisdofs + nisneu; 4412 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4413 /* dofs splitting */ 4414 for (i=0;i<nisdofs;i++) { 4415 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4416 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4417 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4418 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4419 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4420 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4421 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4422 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4423 } 4424 /* neumann boundaries */ 4425 if (pcbddc->NeumannBoundariesLocal) { 4426 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4427 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4428 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4429 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4430 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4431 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4432 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4433 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4434 } 4435 /* free memory */ 4436 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4437 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4438 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4439 } else { 4440 nis = 0; 4441 nisdofs = 0; 4442 nisneu = 0; 4443 isarray = NULL; 4444 } 4445 /* destroy no longer needed map */ 4446 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4447 4448 /* restrict on coarse candidates (if needed) */ 4449 coarse_mat_is = NULL; 4450 if (csin) { 4451 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4452 if (redist) { 4453 PetscMPIInt rank; 4454 PetscInt spc,n_spc_p1,dest[1],destsize; 4455 4456 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4457 spc = active_procs/ncoarse; 4458 n_spc_p1 = active_procs%ncoarse; 4459 if (im_active) { 4460 destsize = 1; 4461 if (rank > n_spc_p1*(spc+1)-1) { 4462 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4463 } else { 4464 dest[0] = rank/(spc+1); 4465 } 4466 } else { 4467 destsize = 0; 4468 } 4469 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4470 } else if (csin_type_simple) { 4471 PetscMPIInt rank; 4472 PetscInt issize,isidx; 4473 4474 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4475 if (im_active) { 4476 issize = 1; 4477 isidx = (PetscInt)rank; 4478 } else { 4479 issize = 0; 4480 isidx = -1; 4481 } 4482 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4483 } else { /* get a suitable subassembling pattern from MATIS code */ 4484 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4485 } 4486 4487 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4488 if (!redist || ncoarse <= void_procs) { 4489 PetscInt ncoarse_cand,tissize,*nisindices; 4490 PetscInt *coarse_candidates; 4491 const PetscInt* tisindices; 4492 4493 /* get coarse candidates' ranks in pc communicator */ 4494 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4495 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4496 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4497 if (!coarse_candidates[i]) { 4498 coarse_candidates[ncoarse_cand++]=i; 4499 } 4500 } 4501 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4502 4503 4504 if (pcbddc->dbg_flag) { 4505 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4506 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4507 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4508 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4509 for (i=0;i<ncoarse_cand;i++) { 4510 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4511 } 4512 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4513 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4514 } 4515 /* shift the pattern on coarse candidates */ 4516 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4517 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4518 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4519 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4520 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4521 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4522 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4523 } 4524 if (pcbddc->dbg_flag) { 4525 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4526 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4527 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4528 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4529 } 4530 } 4531 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4532 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4533 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4534 } else { /* this is the last level, so use just receiving processes in subcomm */ 4535 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4536 } 4537 } else { 4538 if (pcbddc->dbg_flag) { 4539 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4540 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4541 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4542 } 4543 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4544 coarse_mat_is = t_coarse_mat_is; 4545 } 4546 4547 /* create local to global scatters for coarse problem */ 4548 if (compute_vecs) { 4549 PetscInt lrows; 4550 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4551 if (coarse_mat_is) { 4552 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4553 } else { 4554 lrows = 0; 4555 } 4556 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4557 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4558 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4559 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4560 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4561 } 4562 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4563 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4564 4565 /* set defaults for coarse KSP and PC */ 4566 if (multilevel_allowed) { 4567 coarse_ksp_type = KSPRICHARDSON; 4568 coarse_pc_type = PCBDDC; 4569 } else { 4570 coarse_ksp_type = KSPPREONLY; 4571 coarse_pc_type = PCREDUNDANT; 4572 } 4573 4574 /* print some info if requested */ 4575 if (pcbddc->dbg_flag) { 4576 if (!multilevel_allowed) { 4577 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4578 if (multilevel_requested) { 4579 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4580 } else if (pcbddc->max_levels) { 4581 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4582 } 4583 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4584 } 4585 } 4586 4587 /* create the coarse KSP object only once with defaults */ 4588 if (coarse_mat_is) { 4589 MatReuse coarse_mat_reuse; 4590 PetscViewer dbg_viewer = NULL; 4591 if (pcbddc->dbg_flag) { 4592 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4593 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4594 } 4595 if (!pcbddc->coarse_ksp) { 4596 char prefix[256],str_level[16]; 4597 size_t len; 4598 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4599 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4600 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4601 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4602 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4603 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4604 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4605 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4606 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4607 /* prefix */ 4608 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4609 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4610 if (!pcbddc->current_level) { 4611 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4612 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4613 } else { 4614 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4615 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4616 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4617 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4618 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4619 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4620 } 4621 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4622 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4623 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4624 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4625 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4626 /* allow user customization */ 4627 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4628 } 4629 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4630 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4631 if (nisdofs) { 4632 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4633 for (i=0;i<nisdofs;i++) { 4634 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4635 } 4636 } 4637 if (nisneu) { 4638 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4639 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4640 } 4641 4642 /* get some info after set from options */ 4643 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4644 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4645 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4646 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4647 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4648 isbddc = PETSC_FALSE; 4649 } 4650 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4651 if (isredundant) { 4652 KSP inner_ksp; 4653 PC inner_pc; 4654 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4655 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4656 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4657 } 4658 4659 /* assemble coarse matrix */ 4660 if (coarse_reuse) { 4661 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4662 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4663 coarse_mat_reuse = MAT_REUSE_MATRIX; 4664 } else { 4665 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4666 } 4667 if (isbddc || isnn) { 4668 if (pcbddc->coarsening_ratio > 1) { 4669 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4670 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4671 if (pcbddc->dbg_flag) { 4672 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4673 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4674 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4675 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4676 } 4677 } 4678 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4679 } else { 4680 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4681 coarse_mat = coarse_mat_is; 4682 } 4683 } else { 4684 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4685 } 4686 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4687 4688 /* propagate symmetry info of coarse matrix */ 4689 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4690 if (pc->pmat->symmetric_set) { 4691 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4692 } 4693 if (pc->pmat->hermitian_set) { 4694 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4695 } 4696 if (pc->pmat->spd_set) { 4697 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4698 } 4699 /* set operators */ 4700 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4701 if (pcbddc->dbg_flag) { 4702 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4703 } 4704 } else { /* processes non partecipating to coarse solver (if any) */ 4705 coarse_mat = 0; 4706 } 4707 ierr = PetscFree(isarray);CHKERRQ(ierr); 4708 #if 0 4709 { 4710 PetscViewer viewer; 4711 char filename[256]; 4712 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 4713 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 4714 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4715 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4716 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4717 } 4718 #endif 4719 4720 /* Compute coarse null space (special handling by BDDC only) */ 4721 #if 0 4722 if (pcbddc->NullSpace) { 4723 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4724 } 4725 #endif 4726 4727 if (pcbddc->coarse_ksp) { 4728 Vec crhs,csol; 4729 PetscBool ispreonly; 4730 4731 if (CoarseNullSpace) { 4732 if (isbddc) { 4733 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4734 } else { 4735 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4736 } 4737 } 4738 /* setup coarse ksp */ 4739 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4740 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4741 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4742 /* hack */ 4743 if (!csol) { 4744 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4745 } 4746 if (!crhs) { 4747 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4748 } 4749 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4750 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4751 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4752 KSP check_ksp; 4753 KSPType check_ksp_type; 4754 PC check_pc; 4755 Vec check_vec,coarse_vec; 4756 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4757 PetscInt its; 4758 PetscBool compute_eigs; 4759 PetscReal *eigs_r,*eigs_c; 4760 PetscInt neigs; 4761 const char *prefix; 4762 4763 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4764 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4765 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4766 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4767 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4768 if (ispreonly) { 4769 check_ksp_type = KSPPREONLY; 4770 compute_eigs = PETSC_FALSE; 4771 } else { 4772 check_ksp_type = KSPGMRES; 4773 compute_eigs = PETSC_TRUE; 4774 } 4775 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4776 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4777 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4778 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4779 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4780 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4781 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4782 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4783 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4784 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4785 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4786 /* create random vec */ 4787 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4788 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4789 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4790 if (CoarseNullSpace) { 4791 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4792 } 4793 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4794 /* solve coarse problem */ 4795 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4796 if (CoarseNullSpace) { 4797 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4798 } 4799 /* set eigenvalue estimation if preonly has not been requested */ 4800 if (compute_eigs) { 4801 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4802 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4803 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4804 lambda_max = eigs_r[neigs-1]; 4805 lambda_min = eigs_r[0]; 4806 if (pcbddc->use_coarse_estimates) { 4807 if (lambda_max>lambda_min) { 4808 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4809 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4810 } 4811 } 4812 } 4813 4814 /* check coarse problem residual error */ 4815 if (pcbddc->dbg_flag) { 4816 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4817 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4818 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4819 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4820 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4821 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4822 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4823 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4824 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4825 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4826 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4827 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4828 if (compute_eigs) { 4829 PetscReal lambda_max_s,lambda_min_s; 4830 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4831 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4832 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4833 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4834 for (i=0;i<neigs;i++) { 4835 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4836 } 4837 } 4838 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4839 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4840 } 4841 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4842 if (compute_eigs) { 4843 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4844 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4845 } 4846 } 4847 } 4848 /* print additional info */ 4849 if (pcbddc->dbg_flag) { 4850 /* waits until all processes reaches this point */ 4851 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4852 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4853 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4854 } 4855 4856 /* free memory */ 4857 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4858 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4859 PetscFunctionReturn(0); 4860 } 4861 4862 #undef __FUNCT__ 4863 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4864 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4865 { 4866 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4867 PC_IS* pcis = (PC_IS*)pc->data; 4868 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4869 IS subset,subset_mult,subset_n; 4870 PetscInt local_size,coarse_size=0; 4871 PetscInt *local_primal_indices=NULL; 4872 const PetscInt *t_local_primal_indices; 4873 PetscErrorCode ierr; 4874 4875 PetscFunctionBegin; 4876 /* Compute global number of coarse dofs */ 4877 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4878 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4879 } 4880 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4881 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4882 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4883 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4884 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4885 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4886 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4887 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4888 if (local_size != pcbddc->local_primal_size) { 4889 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4890 } 4891 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4892 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4893 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4894 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4895 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4896 4897 /* check numbering */ 4898 if (pcbddc->dbg_flag) { 4899 PetscScalar coarsesum,*array,*array2; 4900 PetscInt i; 4901 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4902 4903 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4904 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4905 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4906 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4907 /* counter */ 4908 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4909 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4910 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4911 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4912 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4913 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4914 4915 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4916 for (i=0;i<pcbddc->local_primal_size;i++) { 4917 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4918 } 4919 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4920 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4921 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4922 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4923 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4924 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4925 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4926 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4927 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4928 for (i=0;i<pcis->n;i++) { 4929 if (array[i] != 0.0 && array[i] != array2[i]) { 4930 PetscInt owned = (PetscInt)(array[i]); 4931 PetscInt neigh = (PetscInt)(array2[i]); 4932 set_error = PETSC_TRUE; 4933 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 4934 } 4935 } 4936 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4937 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4938 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4939 for (i=0;i<pcis->n;i++) { 4940 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4941 } 4942 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4943 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4944 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4945 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4946 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4947 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4948 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4949 PetscInt *gidxs; 4950 4951 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4952 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4953 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4954 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4956 for (i=0;i<pcbddc->local_primal_size;i++) { 4957 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4958 } 4959 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4960 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4961 } 4962 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4963 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4964 } 4965 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4966 /* get back data */ 4967 *coarse_size_n = coarse_size; 4968 *local_primal_indices_n = local_primal_indices; 4969 PetscFunctionReturn(0); 4970 } 4971 4972 #undef __FUNCT__ 4973 #define __FUNCT__ "PCBDDCGlobalToLocal" 4974 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4975 { 4976 IS localis_t; 4977 PetscInt i,lsize,*idxs,n; 4978 PetscScalar *vals; 4979 PetscErrorCode ierr; 4980 4981 PetscFunctionBegin; 4982 /* get indices in local ordering exploiting local to global map */ 4983 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4984 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4985 for (i=0;i<lsize;i++) vals[i] = 1.0; 4986 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4987 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4988 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4989 if (idxs) { /* multilevel guard */ 4990 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4991 } 4992 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4993 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4994 ierr = PetscFree(vals);CHKERRQ(ierr); 4995 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4996 /* now compute set in local ordering */ 4997 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4998 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4999 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5000 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5001 for (i=0,lsize=0;i<n;i++) { 5002 if (PetscRealPart(vals[i]) > 0.5) { 5003 lsize++; 5004 } 5005 } 5006 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5007 for (i=0,lsize=0;i<n;i++) { 5008 if (PetscRealPart(vals[i]) > 0.5) { 5009 idxs[lsize++] = i; 5010 } 5011 } 5012 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5013 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5014 *localis = localis_t; 5015 PetscFunctionReturn(0); 5016 } 5017 5018 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5019 #undef __FUNCT__ 5020 #define __FUNCT__ "PCBDDCMatMult_Private" 5021 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5022 { 5023 PCBDDCChange_ctx change_ctx; 5024 PetscErrorCode ierr; 5025 5026 PetscFunctionBegin; 5027 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5028 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5029 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5030 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5031 PetscFunctionReturn(0); 5032 } 5033 5034 #undef __FUNCT__ 5035 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5036 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5037 { 5038 PCBDDCChange_ctx change_ctx; 5039 PetscErrorCode ierr; 5040 5041 PetscFunctionBegin; 5042 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5043 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5044 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5045 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5046 PetscFunctionReturn(0); 5047 } 5048 5049 #undef __FUNCT__ 5050 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5051 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5052 { 5053 PC_IS *pcis=(PC_IS*)pc->data; 5054 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5055 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5056 Mat S_j; 5057 PetscInt *used_xadj,*used_adjncy; 5058 PetscBool free_used_adj; 5059 PetscErrorCode ierr; 5060 5061 PetscFunctionBegin; 5062 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5063 free_used_adj = PETSC_FALSE; 5064 if (pcbddc->sub_schurs_layers == -1) { 5065 used_xadj = NULL; 5066 used_adjncy = NULL; 5067 } else { 5068 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5069 used_xadj = pcbddc->mat_graph->xadj; 5070 used_adjncy = pcbddc->mat_graph->adjncy; 5071 } else if (pcbddc->computed_rowadj) { 5072 used_xadj = pcbddc->mat_graph->xadj; 5073 used_adjncy = pcbddc->mat_graph->adjncy; 5074 } else { 5075 PetscBool flg_row=PETSC_FALSE; 5076 const PetscInt *xadj,*adjncy; 5077 PetscInt nvtxs; 5078 5079 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5080 if (flg_row) { 5081 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5082 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5083 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5084 free_used_adj = PETSC_TRUE; 5085 } else { 5086 pcbddc->sub_schurs_layers = -1; 5087 used_xadj = NULL; 5088 used_adjncy = NULL; 5089 } 5090 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5091 } 5092 } 5093 5094 /* setup sub_schurs data */ 5095 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5096 if (!sub_schurs->use_mumps) { 5097 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5098 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5099 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5100 } else { 5101 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5102 PetscBool isseqaij; 5103 if (!pcbddc->use_vertices && reuse_solvers) { 5104 PetscInt n_vertices; 5105 5106 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5107 reuse_solvers = (PetscBool)!n_vertices; 5108 } 5109 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5110 if (!isseqaij) { 5111 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5112 if (matis->A == pcbddc->local_mat) { 5113 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5114 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5115 } else { 5116 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5117 } 5118 } 5119 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5120 } 5121 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5122 5123 /* free adjacency */ 5124 if (free_used_adj) { 5125 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5126 } 5127 PetscFunctionReturn(0); 5128 } 5129 5130 #undef __FUNCT__ 5131 #define __FUNCT__ "PCBDDCInitSubSchurs" 5132 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5133 { 5134 PC_IS *pcis=(PC_IS*)pc->data; 5135 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5136 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5137 PCBDDCGraph graph; 5138 PetscErrorCode ierr; 5139 5140 PetscFunctionBegin; 5141 /* attach interface graph for determining subsets */ 5142 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5143 IS verticesIS,verticescomm; 5144 PetscInt vsize,*idxs; 5145 5146 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5147 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5148 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5149 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5150 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5151 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5152 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5153 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5154 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5155 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5156 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5157 /* 5158 if (pcbddc->dbg_flag) { 5159 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5160 } 5161 */ 5162 } else { 5163 graph = pcbddc->mat_graph; 5164 } 5165 5166 /* sub_schurs init */ 5167 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5168 5169 /* free graph struct */ 5170 if (pcbddc->sub_schurs_rebuild) { 5171 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5172 } 5173 PetscFunctionReturn(0); 5174 } 5175